summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Android.common.mk12
-rw-r--r--VERSION2
-rw-r--r--configure.ac5
-rw-r--r--docs/GL3.txt10
-rw-r--r--docs/envvars.html29
-rw-r--r--docs/install.html3
-rw-r--r--docs/relnotes/11.2.0.html4
-rw-r--r--docs/relnotes/11.3.0.html61
-rw-r--r--include/pci_ids/i965_pci_ids.h18
-rw-r--r--src/compiler/builtin_type_macros.h1
-rw-r--r--src/compiler/glsl/ast.h12
-rw-r--r--src/compiler/glsl/ast_function.cpp163
-rw-r--r--src/compiler/glsl/ast_to_hir.cpp26
-rw-r--r--src/compiler/glsl/ast_type.cpp13
-rw-r--r--src/compiler/glsl/builtin_functions.cpp65
-rw-r--r--src/compiler/glsl/builtin_variables.cpp15
-rw-r--r--src/compiler/glsl/glcpp/glcpp-parse.y6
-rw-r--r--src/compiler/glsl/glsl_lexer.ll4
-rw-r--r--src/compiler/glsl/glsl_parser_extras.cpp32
-rw-r--r--src/compiler/glsl/glsl_parser_extras.h7
-rw-r--r--src/compiler/glsl/ir.cpp17
-rw-r--r--src/compiler/glsl/ir.h11
-rw-r--r--src/compiler/glsl/ir_clone.cpp2
-rw-r--r--src/compiler/glsl/link_uniform_initializers.cpp2
-rw-r--r--src/compiler/glsl/link_uniforms.cpp86
-rw-r--r--src/compiler/glsl/link_varyings.cpp23
-rw-r--r--src/compiler/glsl/linker.cpp70
-rw-r--r--src/compiler/glsl/linker.h17
-rw-r--r--src/compiler/glsl/lower_discard_flow.cpp4
-rw-r--r--src/compiler/glsl/main.cpp17
-rw-r--r--src/compiler/glsl/tests/sampler_types_test.cpp2
-rw-r--r--src/compiler/glsl/tests/uniform_initializer_utils.cpp3
-rw-r--r--src/compiler/glsl_types.cpp14
-rw-r--r--src/compiler/glsl_types.h4
-rw-r--r--src/compiler/nir_types.cpp9
-rw-r--r--src/compiler/nir_types.h1
-rw-r--r--src/egl/Android.mk5
-rw-r--r--src/egl/drivers/dri2/platform_android.c14
-rw-r--r--src/egl/drivers/dri2/platform_x11.c9
-rw-r--r--src/egl/main/eglapi.c10
-rw-r--r--src/egl/main/eglsync.c3
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c270
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h84
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.cpp53
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_limits.h3
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp30
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.h7
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c7
-rw-r--r--src/gallium/auxiliary/hud/font.c2
-rw-r--r--src/gallium/auxiliary/hud/hud_context.c56
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c4
-rw-r--r--src/gallium/auxiliary/postprocess/pp_run.c58
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c10
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_strings.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c16
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h3
-rw-r--r--src/gallium/auxiliary/util/u_blit.c52
-rw-r--r--src/gallium/auxiliary/util/u_inlines.h11
-rw-r--r--src/gallium/docs/source/screen.rst3
-rw-r--r--src/gallium/drivers/ddebug/dd_context.c28
-rw-r--r--src/gallium/drivers/ddebug/dd_pipe.h2
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h11
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h277
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h1042
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.h2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_query.c138
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h13
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h42
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query.h12
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_hw.c33
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_hw.h6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c17
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.h1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c1
-rw-r--r--src/gallium/drivers/ilo/ilo_gpgpu.c8
-rw-r--r--src/gallium/drivers/ilo/ilo_screen.c2
-rw-r--r--src/gallium/drivers/ilo/ilo_state.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c2
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources5
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp8
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp61
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp5
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp47
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp41
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp103
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp35
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp10
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp1
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/g80_defs.xml.h279
-rw-r--r--src/gallium/drivers/nouveau/nv50/g80_texture.xml.h451
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.c16
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h263
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_formats.c744
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c12
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c9
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h18
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c69
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c34
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_tex.c92
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h306
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_transfer.c6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c11
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h365
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/Makefile2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme24
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h19
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c287
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c60
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h97
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_macros.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c40
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c51
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h21
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c12
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c104
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c144
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c85
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c387
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c44
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c124
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.h1
-rw-r--r--src/gallium/drivers/r300/r300_screen.c6
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c15
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c3
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c1
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h3
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c11
-rw-r--r--src/gallium/drivers/radeon/radeon_elf_util.c26
-rw-r--r--src/gallium/drivers/radeon/radeon_elf_util.h14
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm.h7
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_util.c6
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c31
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c31
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c35
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h26
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c1784
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h155
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c383
-rw-r--r--src/gallium/drivers/svga/svga_draw.c10
-rw-r--r--src/gallium/drivers/svga/svga_screen.c9
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c4
-rw-r--r--src/gallium/drivers/trace/tr_context.c228
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c60
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.h4
-rw-r--r--src/gallium/drivers/trace/tr_screen.c25
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c3
-rw-r--r--src/gallium/include/pipe/p_context.h31
-rw-r--r--src/gallium/include/pipe/p_defines.h2
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h4
-rw-r--r--src/gallium/include/pipe/p_state.h43
-rw-r--r--src/gallium/state_trackers/clover/core/kernel.cpp13
-rw-r--r--src/gallium/state_trackers/nine/Makefile.sources1
-rw-r--r--src/gallium/state_trackers/nine/adapter9.c44
-rw-r--r--src/gallium/state_trackers/nine/adapter9.h18
-rw-r--r--src/gallium/state_trackers/nine/authenticatedchannel9.c10
-rw-r--r--src/gallium/state_trackers/nine/authenticatedchannel9.h10
-rw-r--r--src/gallium/state_trackers/nine/basetexture9.c14
-rw-r--r--src/gallium/state_trackers/nine/basetexture9.h14
-rw-r--r--src/gallium/state_trackers/nine/buffer9.c81
-rw-r--r--src/gallium/state_trackers/nine/buffer9.h32
-rw-r--r--src/gallium/state_trackers/nine/cryptosession9.c18
-rw-r--r--src/gallium/state_trackers/nine/cryptosession9.h18
-rw-r--r--src/gallium/state_trackers/nine/cubetexture9.c25
-rw-r--r--src/gallium/state_trackers/nine/cubetexture9.h10
-rw-r--r--src/gallium/state_trackers/nine/device9.c261
-rw-r--r--src/gallium/state_trackers/nine/device9.h234
-rw-r--r--src/gallium/state_trackers/nine/device9ex.c34
-rw-r--r--src/gallium/state_trackers/nine/device9ex.h36
-rw-r--r--src/gallium/state_trackers/nine/device9video.c6
-rw-r--r--src/gallium/state_trackers/nine/device9video.h6
-rw-r--r--src/gallium/state_trackers/nine/indexbuffer9.c6
-rw-r--r--src/gallium/state_trackers/nine/indexbuffer9.h6
-rw-r--r--src/gallium/state_trackers/nine/iunknown.c8
-rw-r--r--src/gallium/state_trackers/nine/iunknown.h9
-rw-r--r--src/gallium/state_trackers/nine/nine_ff.c32
-rw-r--r--src/gallium/state_trackers/nine/nine_flags.h14
-rw-r--r--src/gallium/state_trackers/nine/nine_lock.c444
-rw-r--r--src/gallium/state_trackers/nine/nine_pipe.h8
-rw-r--r--src/gallium/state_trackers/nine/nine_state.c12
-rw-r--r--src/gallium/state_trackers/nine/nineexoverlayextension.c2
-rw-r--r--src/gallium/state_trackers/nine/nineexoverlayextension.h2
-rw-r--r--src/gallium/state_trackers/nine/pixelshader9.c2
-rw-r--r--src/gallium/state_trackers/nine/pixelshader9.h2
-rw-r--r--src/gallium/state_trackers/nine/query9.c8
-rw-r--r--src/gallium/state_trackers/nine/query9.h8
-rw-r--r--src/gallium/state_trackers/nine/resource9.c14
-rw-r--r--src/gallium/state_trackers/nine/resource9.h14
-rw-r--r--src/gallium/state_trackers/nine/stateblock9.c44
-rw-r--r--src/gallium/state_trackers/nine/stateblock9.h4
-rw-r--r--src/gallium/state_trackers/nine/surface9.c26
-rw-r--r--src/gallium/state_trackers/nine/surface9.h12
-rw-r--r--src/gallium/state_trackers/nine/swapchain9.c20
-rw-r--r--src/gallium/state_trackers/nine/swapchain9.h12
-rw-r--r--src/gallium/state_trackers/nine/swapchain9ex.c6
-rw-r--r--src/gallium/state_trackers/nine/swapchain9ex.h6
-rw-r--r--src/gallium/state_trackers/nine/texture9.c48
-rw-r--r--src/gallium/state_trackers/nine/texture9.h10
-rw-r--r--src/gallium/state_trackers/nine/vertexbuffer9.c6
-rw-r--r--src/gallium/state_trackers/nine/vertexbuffer9.h6
-rw-r--r--src/gallium/state_trackers/nine/vertexdeclaration9.c2
-rw-r--r--src/gallium/state_trackers/nine/vertexdeclaration9.h2
-rw-r--r--src/gallium/state_trackers/nine/vertexshader9.c2
-rw-r--r--src/gallium/state_trackers/nine/vertexshader9.h2
-rw-r--r--src/gallium/state_trackers/nine/volume9.c20
-rw-r--r--src/gallium/state_trackers/nine/volume9.h14
-rw-r--r--src/gallium/state_trackers/nine/volumetexture9.c23
-rw-r--r--src/gallium/state_trackers/nine/volumetexture9.h10
-rw-r--r--src/gallium/tests/trivial/compute.c11
-rw-r--r--src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c1
-rw-r--r--src/mapi/glapi/gen/EXT_texture_integer.xml8
-rw-r--r--src/mapi/glapi/gen/es_EXT.xml88
-rw-r--r--src/mesa/Makefile.sources3
-rw-r--r--src/mesa/drivers/common/meta_copy_image.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_tex_validate.c1
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_compute.c96
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c23
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c18
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp39
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_builder.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_fast_clear.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c23
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp50
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_builder.h17
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c43
-rw-r--r--src/mesa/drivers/dri/i965/gen6_constant_state.c190
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c89
-rw-r--r--src/mesa/drivers/dri/i965/gen7_l3_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c75
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c62
-rw-r--r--src/mesa/drivers/dri/i965/intel_blit.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_copy_image.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c217
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.h24
-rw-r--r--src/mesa/drivers/dri/i965/intel_pixel_bitmap.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_pixel_read.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_image.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_subimage.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_validate.c1
-rw-r--r--src/mesa/main/attrib.c4
-rw-r--r--src/mesa/main/bufferobj.c4
-rw-r--r--src/mesa/main/buffers.c65
-rw-r--r--src/mesa/main/buffers.h14
-rw-r--r--src/mesa/main/clear.c87
-rw-r--r--src/mesa/main/compute.c3
-rw-r--r--src/mesa/main/config.h11
-rw-r--r--src/mesa/main/context.c8
-rw-r--r--src/mesa/main/dd.h9
-rw-r--r--src/mesa/main/dlist.c385
-rw-r--r--src/mesa/main/dlist.h38
-rw-r--r--src/mesa/main/enable.c4
-rw-r--r--src/mesa/main/extensions_table.h4
-rw-r--r--src/mesa/main/fbobject.c5
-rw-r--r--src/mesa/main/format_utils.c74
-rw-r--r--src/mesa/main/formatquery.c5
-rw-r--r--src/mesa/main/genmipmap.c2
-rw-r--r--src/mesa/main/get.c2
-rw-r--r--src/mesa/main/get_hash_params.py14
-rw-r--r--src/mesa/main/glformats.c8
-rw-r--r--src/mesa/main/image.c11
-rw-r--r--src/mesa/main/imports.c8
-rw-r--r--src/mesa/main/imports.h3
-rw-r--r--src/mesa/main/mipmap.c21
-rw-r--r--src/mesa/main/mtypes.h17
-rw-r--r--src/mesa/main/pipelineobj.c5
-rw-r--r--src/mesa/main/program_resource.c4
-rw-r--r--src/mesa/main/readpix.c2
-rw-r--r--src/mesa/main/samplerobj.c6
-rw-r--r--src/mesa/main/shaderapi.c1
-rw-r--r--src/mesa/main/shared.c15
-rw-r--r--src/mesa/main/tests/dispatch_sanity.cpp10
-rw-r--r--src/mesa/main/texgetimage.c14
-rw-r--r--src/mesa/main/teximage.c50
-rw-r--r--src/mesa/main/teximage.h60
-rw-r--r--src/mesa/main/texobj.c33
-rw-r--r--src/mesa/main/texobj.h19
-rw-r--r--src/mesa/main/texparam.c34
-rw-r--r--src/mesa/main/texstate.c2
-rw-r--r--src/mesa/main/texstore.c3
-rw-r--r--src/mesa/main/textureview.c9
-rw-r--r--src/mesa/program/prog_statevars.c4
-rw-r--r--src/mesa/state_tracker/st_atom.c60
-rw-r--r--src/mesa/state_tracker/st_atom.h17
-rw-r--r--src/mesa/state_tracker/st_atom_atomicbuf.c26
-rw-r--r--src/mesa/state_tracker/st_atom_constbuf.c46
-rw-r--r--src/mesa/state_tracker/st_atom_image.c236
-rw-r--r--src/mesa/state_tracker/st_atom_sampler.c8
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c32
-rw-r--r--src/mesa/state_tracker/st_atom_storagebuf.c21
-rw-r--r--src/mesa/state_tracker/st_atom_texture.c62
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.c266
-rw-r--r--src/mesa/state_tracker/st_cb_clear.c162
-rw-r--r--src/mesa/state_tracker/st_cb_compute.c85
-rw-r--r--src/mesa/state_tracker/st_cb_compute.h38
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c305
-rw-r--r--src/mesa/state_tracker/st_cb_drawtex.c62
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c13
-rw-r--r--src/mesa/state_tracker/st_cb_msaa.c2
-rw-r--r--src/mesa/state_tracker/st_cb_program.c30
-rw-r--r--src/mesa/state_tracker/st_cb_rasterpos.c2
-rw-r--r--src/mesa/state_tracker/st_cb_readpixels.c6
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c142
-rw-r--r--src/mesa/state_tracker/st_context.c56
-rw-r--r--src/mesa/state_tracker/st_context.h35
-rw-r--r--src/mesa/state_tracker/st_draw.c96
-rw-r--r--src/mesa/state_tracker/st_draw.h7
-rw-r--r--src/mesa/state_tracker/st_draw_feedback.c2
-rw-r--r--src/mesa/state_tracker/st_extensions.c69
-rw-r--r--src/mesa/state_tracker/st_format.c42
-rw-r--r--src/mesa/state_tracker/st_format.h8
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp338
-rw-r--r--src/mesa/state_tracker/st_manager.c1
-rw-r--r--src/mesa/state_tracker/st_program.c105
-rw-r--r--src/mesa/state_tracker/st_program.h41
-rw-r--r--src/util/hash_table.h4
-rw-r--r--src/util/u_atomic.h59
358 files changed, 12445 insertions, 4764 deletions
diff --git a/Android.common.mk b/Android.common.mk
index 948561c6b1b..c4823f7dbf1 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -21,13 +21,8 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-# use c99 compiler by default
-ifeq ($(LOCAL_CC),)
ifeq ($(LOCAL_IS_HOST_MODULE),true)
-LOCAL_CC := $(HOST_CC) -std=c99 -D_GNU_SOURCE
-else
-LOCAL_CC := $(TARGET_CC) -std=c99
-endif
+LOCAL_CFLAGS += -D_GNU_SOURCE
endif
LOCAL_C_INCLUDES += \
@@ -37,6 +32,7 @@ LOCAL_C_INCLUDES += \
MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION)
# define ANDROID_VERSION (e.g., 4.0.x => 0x0400)
LOCAL_CFLAGS += \
+ -Wno-unused-parameter \
-DPACKAGE_VERSION=\"$(MESA_VERSION)\" \
-DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\" \
-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
@@ -60,6 +56,10 @@ LOCAL_CFLAGS += \
-fvisibility=hidden \
-Wno-sign-compare
+# mesa requires at least c99 compiler
+LOCAL_CONLYFLAGS += \
+ -std=c99
+
ifeq ($(strip $(MESA_ENABLE_ASM)),true)
ifeq ($(TARGET_ARCH),x86)
LOCAL_CFLAGS += \
diff --git a/VERSION b/VERSION
index 96cb83954d3..677ed996402 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-11.2.0-devel
+11.3.0-devel
diff --git a/configure.ac b/configure.ac
index b4e2539e66f..5f686f5602a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63
LIBDRM_INTEL_REQUIRED=2.4.61
LIBDRM_NVVIEUX_REQUIRED=2.4.66
LIBDRM_NOUVEAU_REQUIRED=2.4.66
-LIBDRM_FREEDRENO_REQUIRED=2.4.65
+LIBDRM_FREEDRENO_REQUIRED=2.4.67
DRI2PROTO_REQUIRED=2.6
DRI3PROTO_REQUIRED=1.0
PRESENTPROTO_REQUIRED=1.0
@@ -2297,6 +2297,9 @@ dnl in LLVM_LIBS.
if test "x$MESA_LLVM" != x0; then
+ if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then
+ AC_MSG_ERROR([Calling ${LLVM_CONFIG} failed])
+ fi
LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
dnl llvm-config may not give the right answer when llvm is a built as a
diff --git a/docs/GL3.txt b/docs/GL3.txt
index e2aa52cc388..8c043e9b5ff 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -215,10 +215,10 @@ GLES3.1, GLSL ES 3.1
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
GL_ARB_framebuffer_no_attachments DONE (i965)
GL_ARB_program_interface_query DONE (all drivers)
- GL_ARB_shader_atomic_counters DONE (i965)
+ GL_ARB_shader_atomic_counters DONE (i965, nvc0)
GL_ARB_shader_image_load_store DONE (i965)
GL_ARB_shader_image_size DONE (i965)
- GL_ARB_shader_storage_buffer_object DONE (i965)
+ GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
GL_ARB_shading_language_packing DONE (all drivers)
GL_ARB_separate_shader_objects DONE (all drivers)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -249,14 +249,14 @@ GLES3.2, GLSL ES 3.2
GL_OES_primitive_bounding box not started
GL_OES_sample_shading not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
GL_OES_sample_variables not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
- GL_OES_shader_image_atomic not started (based on parts of GL_ARB_shader_image_load_store, which is done for some drivers)
+ GL_OES_shader_image_atomic DONE (all drivers that support GL_ARB_shader_image_load_store)
GL_OES_shader_io_blocks not started (based on parts of GLSL 1.50, which is done)
GL_OES_shader_multisample_interpolation not started (based on parts of GL_ARB_gpu_shader5, which is done)
GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
- GL_OES_texture_border_clamp not started (based on GL_ARB_texture_border_clamp, which is done)
+ GL_OES_texture_border_clamp DONE (all drivers)
GL_OES_texture_buffer not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
GL_OES_texture_cube_map_array not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
- GL_OES_texture_stencil8 not started (based on GL_ARB_texture_stencil8, which is done for some drivers)
+ GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8)
GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample)
More info about these features and the work involved can be found at
diff --git a/docs/envvars.html b/docs/envvars.html
index ba83335d0b0..06aa0ac9369 100644
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -91,6 +91,14 @@ This is only valid for versions >= 3.0.
<li> Mesa may not really implement all the features of the given version.
(for developers only)
</ul>
+<li>MESA_GLES_VERSION_OVERRIDE - changes the value returned by
+glGetString(GL_VERSION) for OpenGL ES.
+<ul>
+<li> The format should be MAJOR.MINOR
+<li> Examples: 2.0, 3.0, 3.1
+<li> Mesa may not really implement all the features of the given version.
+(for developers only)
+</ul>
<li>MESA_GLSL_VERSION_OVERRIDE - changes the value returned by
glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as
"130". Mesa will not really implement all the features of the given language version
@@ -224,7 +232,7 @@ See src/mesa/state_tracker/st_debug.c for other options.
<li>LP_PERF - a comma-separated list of options to selectively no-op various
parts of the driver. See the source code for details.
<li>LP_NUM_THREADS - an integer indicating how many threads to use for rendering.
- Zero turns of threading completely. The default value is the number of CPU
+ Zero turns off threading completely. The default value is the number of CPU
cores present.
</ul>
@@ -245,6 +253,25 @@ for details.
</ul>
+<h3>VC4 driver environment variables</h3>
+<ul>
+<li>VC4_DEBUG - a comma-separated list of named flags, which do various things:
+<ul>
+ <li>cl - dump command list during creation</li>
+ <li>qpu - dump generated QPU instructions</li>
+ <li>qir - dump QPU IR during program compile</li>
+ <li>nir - dump NIR during program compile</li>
+ <li>tgsi - dump TGSI during program compile</li>
+ <li>shaderdb - dump program compile information for shader-db analysis</li>
+ <li>perf - print during performance-related events</li>
+ <li>norast - skip actual hardware execution of commands</li>
+ <li>always_flush - flush after each draw call</li>
+ <li>always_sync - wait for finish after each flush</li>
+ <li>dump - write a GPU command stream trace file (VC4 simulator only)</li>
+</ul>
+</ul>
+
+
<p>
Other Gallium drivers have their own environment variables. These may change
frequently so the source code should be consulted for details.
diff --git a/docs/install.html b/docs/install.html
index c826d642001..ae911d5347b 100644
--- a/docs/install.html
+++ b/docs/install.html
@@ -58,6 +58,9 @@ On Windows with MinGW, install flex and bison with:
For MSVC on Windows, install
<a href="http://winflexbison.sourceforge.net/">Win flex-bison</a>.
</li>
+<br>
+<li>For building on Windows, Microsoft Visual Studio 2013 or later is required.
+</li>
</ul>
diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
index 069eca2b70c..baedb4ca756 100644
--- a/docs/relnotes/11.2.0.html
+++ b/docs/relnotes/11.2.0.html
@@ -74,6 +74,10 @@ TBD.
<h2>Changes</h2>
+Microsoft Visual Studio 2013 or later is now required for building
+on Windows.
+Previously, Visual Studio 2008 and later were supported.
+
TBD.
</div>
diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html
new file mode 100644
index 00000000000..fa650830e23
--- /dev/null
+++ b/docs/relnotes/11.3.0.html
@@ -0,0 +1,61 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.3.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 11.3.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 11.3.1.
+</p>
+<p>
+Mesa 11.3.0 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1. OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
+<li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+TBD.
+
+<h2>Changes</h2>
+
+TBD.
+
+</div>
+</body>
+</html>
diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 5139e279bcc..bdfbefe0b75 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -112,6 +112,7 @@ CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
+CHIPSET(0x190B, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1")
CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f")
@@ -122,16 +123,17 @@ CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2")
CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)")
CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)")
-CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2")
-CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
-CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)")
+CHIPSET(0x1921, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)")
+CHIPSET(0x1923, skl_gt3, "Intel(R) Skylake GT3e")
+CHIPSET(0x1926, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)")
CHIPSET(0x192A, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)")
-CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics 555 (Skylake GT3e)")
+CHIPSET(0x192D, skl_gt3, "Intel(R) Iris Graphics P555 (Skylake GT3e)")
+CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
+CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
+CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
+CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
diff --git a/src/compiler/builtin_type_macros.h b/src/compiler/builtin_type_macros.h
index 7bd2e4e6558..da3f19e7ab2 100644
--- a/src/compiler/builtin_type_macros.h
+++ b/src/compiler/builtin_type_macros.h
@@ -78,6 +78,7 @@ DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3)
DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4)
DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4)
+DECL_TYPE(sampler, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_VOID)
DECL_TYPE(sampler1D, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_FLOAT)
DECL_TYPE(sampler2D, GL_SAMPLER_2D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_FLOAT)
DECL_TYPE(sampler3D, GL_SAMPLER_3D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT)
diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index 03df6c08b2b..9aa5bb99f49 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -685,18 +685,6 @@ struct ast_type_qualifier {
*/
bool has_auxiliary_storage() const;
- /**
- * \brief Return string representation of interpolation qualifier.
- *
- * If an interpolation qualifier is present, then return that qualifier's
- * string representation. Otherwise, return null. For example, if the
- * noperspective bit is set, then this returns "noperspective".
- *
- * If multiple interpolation qualifiers are somehow present, then the
- * returned string is undefined but not null.
- */
- const char *interpolation_string() const;
-
bool merge_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
const ast_type_qualifier &q,
diff --git a/src/compiler/glsl/ast_function.cpp b/src/compiler/glsl/ast_function.cpp
index c7fdcb24379..1a440203cfc 100644
--- a/src/compiler/glsl/ast_function.cpp
+++ b/src/compiler/glsl/ast_function.cpp
@@ -1405,9 +1405,9 @@ emit_inline_matrix_constructor(const glsl_type *type,
zero.d[i] = 0.0;
ir_instruction *inst =
- new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var),
- new(ctx) ir_constant(rhs_var->type, &zero),
- NULL);
+ new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var),
+ new(ctx) ir_constant(rhs_var->type, &zero),
+ NULL);
instructions->push_tail(inst);
ir_dereference *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
@@ -1422,36 +1422,36 @@ emit_inline_matrix_constructor(const glsl_type *type,
* columns than rows).
*/
static const unsigned rhs_swiz[4][4] = {
- { 0, 1, 1, 1 },
- { 1, 0, 1, 1 },
- { 1, 1, 0, 1 },
- { 1, 1, 1, 0 }
+ { 0, 1, 1, 1 },
+ { 1, 0, 1, 1 },
+ { 1, 1, 0, 1 },
+ { 1, 1, 1, 0 }
};
const unsigned cols_to_init = MIN2(type->matrix_columns,
- type->vector_elements);
+ type->vector_elements);
for (unsigned i = 0; i < cols_to_init; i++) {
- ir_constant *const col_idx = new(ctx) ir_constant(i);
- ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
+ ir_constant *const col_idx = new(ctx) ir_constant(i);
+ ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
- ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
- ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i],
- type->vector_elements);
+ ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
+ ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i],
+ type->vector_elements);
- inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
- instructions->push_tail(inst);
+ inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
+ instructions->push_tail(inst);
}
for (unsigned i = cols_to_init; i < type->matrix_columns; i++) {
- ir_constant *const col_idx = new(ctx) ir_constant(i);
- ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
+ ir_constant *const col_idx = new(ctx) ir_constant(i);
+ ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx);
- ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
- ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1,
- type->vector_elements);
+ ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var);
+ ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1,
+ type->vector_elements);
- inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
- instructions->push_tail(inst);
+ inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
+ instructions->push_tail(inst);
}
} else if (first_param->type->is_matrix()) {
/* From page 50 (56 of the PDF) of the GLSL 1.50 spec:
@@ -1469,36 +1469,43 @@ emit_inline_matrix_constructor(const glsl_type *type,
/* If the source matrix is smaller, pre-initialize the relavent parts of
* the destination matrix to the identity matrix.
*/
- if ((src_matrix->type->matrix_columns < var->type->matrix_columns)
- || (src_matrix->type->vector_elements < var->type->vector_elements)) {
-
- /* If the source matrix has fewer rows, every column of the destination
- * must be initialized. Otherwise only the columns in the destination
- * that do not exist in the source must be initialized.
- */
- unsigned col =
- (src_matrix->type->vector_elements < var->type->vector_elements)
- ? 0 : src_matrix->type->matrix_columns;
-
- const glsl_type *const col_type = var->type->column_type();
- for (/* empty */; col < var->type->matrix_columns; col++) {
- ir_constant_data ident;
+ if ((src_matrix->type->matrix_columns < var->type->matrix_columns) ||
+ (src_matrix->type->vector_elements < var->type->vector_elements)) {
- ident.f[0] = 0.0;
- ident.f[1] = 0.0;
- ident.f[2] = 0.0;
- ident.f[3] = 0.0;
-
- ident.f[col] = 1.0;
+ /* If the source matrix has fewer rows, every column of the destination
+ * must be initialized. Otherwise only the columns in the destination
+ * that do not exist in the source must be initialized.
+ */
+ unsigned col =
+ (src_matrix->type->vector_elements < var->type->vector_elements)
+ ? 0 : src_matrix->type->matrix_columns;
+
+ const glsl_type *const col_type = var->type->column_type();
+ for (/* empty */; col < var->type->matrix_columns; col++) {
+ ir_constant_data ident;
+
+ if (!col_type->is_double()) {
+ ident.f[0] = 0.0f;
+ ident.f[1] = 0.0f;
+ ident.f[2] = 0.0f;
+ ident.f[3] = 0.0f;
+ ident.f[col] = 1.0f;
+ } else {
+ ident.d[0] = 0.0;
+ ident.d[1] = 0.0;
+ ident.d[2] = 0.0;
+ ident.d[3] = 0.0;
+ ident.d[col] = 1.0;
+ }
- ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident);
+ ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident);
- ir_rvalue *const lhs =
- new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col));
+ ir_rvalue *const lhs =
+ new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col));
- ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL);
- instructions->push_tail(inst);
- }
+ ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL);
+ instructions->push_tail(inst);
+ }
}
/* Assign columns from the source matrix to the destination matrix.
@@ -1507,51 +1514,51 @@ emit_inline_matrix_constructor(const glsl_type *type,
* generate a temporary and copy the paramter there.
*/
ir_variable *const rhs_var =
- new(ctx) ir_variable(first_param->type, "mat_ctor_mat",
- ir_var_temporary);
+ new(ctx) ir_variable(first_param->type, "mat_ctor_mat",
+ ir_var_temporary);
instructions->push_tail(rhs_var);
ir_dereference *const rhs_var_ref =
- new(ctx) ir_dereference_variable(rhs_var);
+ new(ctx) ir_dereference_variable(rhs_var);
ir_instruction *const inst =
- new(ctx) ir_assignment(rhs_var_ref, first_param, NULL);
+ new(ctx) ir_assignment(rhs_var_ref, first_param, NULL);
instructions->push_tail(inst);
const unsigned last_row = MIN2(src_matrix->type->vector_elements,
- var->type->vector_elements);
+ var->type->vector_elements);
const unsigned last_col = MIN2(src_matrix->type->matrix_columns,
- var->type->matrix_columns);
+ var->type->matrix_columns);
unsigned swiz[4] = { 0, 0, 0, 0 };
for (unsigned i = 1; i < last_row; i++)
- swiz[i] = i;
+ swiz[i] = i;
- const unsigned write_mask = (1U << last_row) - 1;
+ const unsigned write_mask = (1U << last_row) - 1;
for (unsigned i = 0; i < last_col; i++) {
- ir_dereference *const lhs =
- new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i));
- ir_rvalue *const rhs_col =
- new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i));
-
- /* If one matrix has columns that are smaller than the columns of the
- * other matrix, wrap the column access of the larger with a swizzle
- * so that the LHS and RHS of the assignment have the same size (and
- * therefore have the same type).
- *
- * It would be perfectly valid to unconditionally generate the
- * swizzles, this this will typically result in a more compact IR tree.
- */
- ir_rvalue *rhs;
- if (lhs->type->vector_elements != rhs_col->type->vector_elements) {
- rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row);
- } else {
- rhs = rhs_col;
- }
+ ir_dereference *const lhs =
+ new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i));
+ ir_rvalue *const rhs_col =
+ new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i));
+
+ /* If one matrix has columns that are smaller than the columns of the
+ * other matrix, wrap the column access of the larger with a swizzle
+ * so that the LHS and RHS of the assignment have the same size (and
+ * therefore have the same type).
+ *
+ * It would be perfectly valid to unconditionally generate the
+ * swizzles, this this will typically result in a more compact IR tree.
+ */
+ ir_rvalue *rhs;
+ if (lhs->type->vector_elements != rhs_col->type->vector_elements) {
+ rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row);
+ } else {
+ rhs = rhs_col;
+ }
- ir_instruction *inst =
- new(ctx) ir_assignment(lhs, rhs, NULL, write_mask);
- instructions->push_tail(inst);
+ ir_instruction *inst =
+ new(ctx) ir_assignment(lhs, rhs, NULL, write_mask);
+ instructions->push_tail(inst);
}
} else {
const unsigned cols = type->matrix_columns;
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 9e811661a2e..75abef6a8f9 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -1133,9 +1133,9 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_INTERFACE:
- case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_SUBROUTINE:
+ case GLSL_TYPE_FUNCTION:
/* I assume a comparison of a struct containing a sampler just
* ignores the sampler present in the type.
*/
@@ -2268,7 +2268,7 @@ get_type_name_for_precision_qualifier(const glsl_type *type)
type->sampler_array + 2 * type->sampler_shadow;
const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4;
assert(type_idx < 4);
- switch (type->sampler_type) {
+ switch (type->sampled_type) {
case GLSL_TYPE_FLOAT:
switch (type->sampler_dimensionality) {
case GLSL_SAMPLER_DIM_1D: {
@@ -2750,6 +2750,17 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual,
"vertex shader inputs or fragment shader outputs",
interpolation_string(interpolation));
}
+ } else if (state->es_shader &&
+ ((mode == ir_var_shader_in &&
+ state->stage != MESA_SHADER_VERTEX) ||
+ (mode == ir_var_shader_out &&
+ state->stage != MESA_SHADER_FRAGMENT))) {
+ /* Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
+ *
+ * "When no interpolation qualifier is present, smooth interpolation
+ * is used."
+ */
+ interpolation = INTERP_QUALIFIER_SMOOTH;
}
return interpolation;
@@ -2954,7 +2965,7 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
"used on image function parameters");
}
- if (qual->image_base_type != base_type->sampler_type) {
+ if (qual->image_base_type != base_type->sampled_type) {
_mesa_glsl_error(loc, state, "format qualifier doesn't match the "
"base data type of the image");
}
@@ -4679,8 +4690,7 @@ ast_declarator_list::hir(exec_list *instructions,
&& this->type->qualifier.has_interpolation()
&& this->type->qualifier.flags.q.varying) {
- const char *i = this->type->qualifier.interpolation_string();
- assert(i != NULL);
+ const char *i = interpolation_string(var->data.interpolation);
const char *s;
if (this->type->qualifier.flags.q.centroid)
s = "centroid varying";
@@ -4710,9 +4720,7 @@ ast_declarator_list::hir(exec_list *instructions,
if (state->is_version(130, 300)
&& this->type->qualifier.has_interpolation()) {
- const char *i = this->type->qualifier.interpolation_string();
- assert(i != NULL);
-
+ const char *i = interpolation_string(var->data.interpolation);
switch (state->stage) {
case MESA_SHADER_VERTEX:
if (this->type->qualifier.flags.q.in) {
@@ -6259,7 +6267,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
decl_count);
bool first_member = true;
- bool first_member_has_explicit_location;
+ bool first_member_has_explicit_location = false;
unsigned i = 0;
foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.cpp
index e0e331152dd..dcd83efa6ff 100644
--- a/src/compiler/glsl/ast_type.cpp
+++ b/src/compiler/glsl/ast_type.cpp
@@ -102,19 +102,6 @@ ast_type_qualifier::has_auxiliary_storage() const
|| this->flags.q.patch;
}
-const char*
-ast_type_qualifier::interpolation_string() const
-{
- if (this->flags.q.smooth)
- return "smooth";
- else if (this->flags.q.flat)
- return "flat";
- else if (this->flags.q.noperspective)
- return "noperspective";
- else
- return NULL;
-}
-
/**
* This function merges both duplicate identifies within a single layout and
* multiple layout qualifiers on a single variable declaration. The
diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp
index 5512a33f114..bbb237a102c 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -448,8 +448,16 @@ shader_image_load_store(const _mesa_glsl_parse_state *state)
static bool
shader_image_atomic(const _mesa_glsl_parse_state *state)
{
- return (state->is_version(420, 0) ||
- state->ARB_shader_image_load_store_enable);
+ return (state->is_version(420, 320) ||
+ state->ARB_shader_image_load_store_enable ||
+ state->OES_shader_image_atomic_enable);
+}
+
+static bool
+shader_image_atomic_exchange_float(const _mesa_glsl_parse_state *state)
+{
+ return (state->is_version(450, 320) ||
+ state->OES_shader_image_atomic_enable);
}
static bool
@@ -577,17 +585,6 @@ private:
unsigned num_arguments,
unsigned flags);
- enum image_function_flags {
- IMAGE_FUNCTION_EMIT_STUB = (1 << 0),
- IMAGE_FUNCTION_RETURNS_VOID = (1 << 1),
- IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2),
- IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
- IMAGE_FUNCTION_READ_ONLY = (1 << 4),
- IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
- IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6),
- IMAGE_FUNCTION_MS_ONLY = (1 << 7),
- };
-
/**
* Create a new image built-in function for all known image types.
* \p flags is a bitfield of \c image_function_flags flags.
@@ -836,6 +833,18 @@ private:
/** @} */
};
+enum image_function_flags {
+ IMAGE_FUNCTION_EMIT_STUB = (1 << 0),
+ IMAGE_FUNCTION_RETURNS_VOID = (1 << 1),
+ IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2),
+ IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
+ IMAGE_FUNCTION_READ_ONLY = (1 << 4),
+ IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
+ IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6),
+ IMAGE_FUNCTION_MS_ONLY = (1 << 7),
+ IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE = (1 << 8)
+};
+
} /* anonymous namespace */
/**
@@ -2921,7 +2930,7 @@ builtin_builder::add_image_function(const char *name,
ir_function *f = new(mem_ctx) ir_function(name);
for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) {
- if ((types[i]->sampler_type != GLSL_TYPE_FLOAT ||
+ if ((types[i]->sampled_type != GLSL_TYPE_FLOAT ||
(flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) &&
(types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS ||
!(flags & IMAGE_FUNCTION_MS_ONLY)))
@@ -2981,7 +2990,9 @@ builtin_builder::add_image_functions(bool glsl)
add_image_function((glsl ? "imageAtomicExchange" :
"__intrinsic_image_atomic_exchange"),
"__intrinsic_image_atomic_exchange",
- &builtin_builder::_image_prototype, 1, atom_flags);
+ &builtin_builder::_image_prototype, 1,
+ (flags | IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE |
+ IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE));
add_image_function((glsl ? "imageAtomicCompSwap" :
"__intrinsic_image_atomic_comp_swap"),
@@ -5232,13 +5243,28 @@ builtin_builder::_mid3(const glsl_type *type)
return sig;
}
+static builtin_available_predicate
+get_image_available_predicate(const glsl_type *type, unsigned flags)
+{
+ if ((flags & IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE) &&
+ type->sampled_type == GLSL_TYPE_FLOAT)
+ return shader_image_atomic_exchange_float;
+
+ else if (flags & (IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE |
+ IMAGE_FUNCTION_AVAIL_ATOMIC))
+ return shader_image_atomic;
+
+ else
+ return shader_image_load_store;
+}
+
ir_function_signature *
builtin_builder::_image_prototype(const glsl_type *image_type,
unsigned num_arguments,
unsigned flags)
{
const glsl_type *data_type = glsl_type::get_instance(
- image_type->sampler_type,
+ image_type->sampled_type,
(flags & IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE ? 4 : 1),
1);
const glsl_type *ret_type = (flags & IMAGE_FUNCTION_RETURNS_VOID ?
@@ -5249,10 +5275,9 @@ builtin_builder::_image_prototype(const glsl_type *image_type,
ir_variable *coord = in_var(
glsl_type::ivec(image_type->coordinate_components()), "coord");
- const builtin_available_predicate avail =
- (flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic :
- shader_image_load_store);
- ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord);
+ ir_function_signature *sig = new_sig(
+ ret_type, get_image_available_predicate(image_type, flags),
+ 2, image, coord);
/* Sample index for multisample images. */
if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS)
diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp
index 6db74f1c634..d20fc4a816c 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -770,11 +770,16 @@ builtin_variable_generator::generate_constants()
}
if (state->is_version(430, 310) || state->ARB_compute_shader_enable) {
- add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS);
- add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS);
- add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS);
- add_const("gl_MaxComputeTextureImageUnits", MAX_COMPUTE_TEXTURE_IMAGE_UNITS);
- add_const("gl_MaxComputeUniformComponents", MAX_COMPUTE_UNIFORM_COMPONENTS);
+ add_const("gl_MaxComputeAtomicCounterBuffers",
+ state->Const.MaxComputeAtomicCounterBuffers);
+ add_const("gl_MaxComputeAtomicCounters",
+ state->Const.MaxComputeAtomicCounters);
+ add_const("gl_MaxComputeImageUniforms",
+ state->Const.MaxComputeImageUniforms);
+ add_const("gl_MaxComputeTextureImageUnits",
+ state->Const.MaxComputeTextureImageUnits);
+ add_const("gl_MaxComputeUniformComponents",
+ state->Const.MaxComputeUniformComponents);
add_const_ivec3("gl_MaxComputeWorkGroupCount",
state->Const.MaxComputeWorkGroupCount[0],
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y
index 43a1aa94aff..b03e1910758 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2096,6 +2096,9 @@ _check_for_reserved_macro_name (glcpp_parser_t *parser, YYLTYPE *loc,
if (strncmp(identifier, "GL_", 3) == 0) {
glcpp_error (loc, parser, "Macro names starting with \"GL_\" are reserved.\n");
}
+ if (strcmp(identifier, "defined") == 0) {
+ glcpp_error (loc, parser, "\"defined\" cannot be used as a macro name");
+ }
}
static int
@@ -2388,6 +2391,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
if (version >= 310) {
+ if (extensions->ARB_shader_image_load_store)
+ add_builtin_define(parser, "GL_OES_shader_image_atomic", 1);
+
if (extensions->OES_geometry_shader) {
add_builtin_define(parser, "GL_OES_geometry_point_size", 1);
add_builtin_define(parser, "GL_OES_geometry_shader", 1);
diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index e59f93e10ef..9704fc7ac4f 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -113,11 +113,7 @@ literal_integer(char *text, int len, struct _mesa_glsl_parse_state *state,
if (base == 16)
digits += 2;
-#ifdef _MSC_VER
- unsigned __int64 value = _strtoui64(digits, NULL, base);
-#else
unsigned long long value = strtoull(digits, NULL, base);
-#endif
lval->n = (int)value;
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp
index 86cf091b4fe..8ccbefc3f71 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -120,6 +120,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters;
this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters;
this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters;
+ this->Const.MaxComputeAtomicCounters = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters;
this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters;
this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings;
this->Const.MaxVertexAtomicCounterBuffers =
@@ -132,6 +133,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers;
this->Const.MaxFragmentAtomicCounterBuffers =
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+ this->Const.MaxComputeAtomicCounterBuffers =
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers;
this->Const.MaxCombinedAtomicCounterBuffers =
ctx->Const.MaxCombinedAtomicBuffers;
this->Const.MaxAtomicCounterBufferSize =
@@ -143,6 +146,9 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++)
this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i];
+ this->Const.MaxComputeTextureImageUnits = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
+ this->Const.MaxComputeUniformComponents = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents;
+
this->Const.MaxImageUnits = ctx->Const.MaxImageUnits;
this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources;
this->Const.MaxImageSamples = ctx->Const.MaxImageSamples;
@@ -151,6 +157,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms;
this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms;
this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms;
+ this->Const.MaxComputeImageUniforms = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms;
this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms;
/* ARB_viewport_array */
@@ -601,6 +608,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
EXT(OES_geometry_point_size, false, true, OES_geometry_shader),
EXT(OES_geometry_shader, false, true, OES_geometry_shader),
+ EXT(OES_shader_image_atomic, false, true, ARB_shader_image_load_store),
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
EXT(OES_texture_3D, false, true, dummy_true),
EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample),
@@ -946,27 +954,11 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
"the interface block");
}
- /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
- *
- * "GLSL ES 3.0 does not support interface blocks for shader inputs or
- * outputs."
- *
- * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
- *
- * "Only variables output from a shader can be candidates for
- * invariance."
- *
- * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
- *
- * "If optional qualifiers are used, they can include interpolation
- * qualifiers, auxiliary storage qualifiers, and storage qualifiers
- * and they must declare an input, output, or uniform member
- * consistent with the interface qualifier of the block"
- */
- if (qualifier.flags.q.invariant)
+ if (!(q.flags.q.in || q.flags.q.out) && qualifier.flags.q.invariant)
_mesa_glsl_error(locp, state,
- "invariant qualifiers cannot be used "
- "with interface blocks members");
+ "invariant qualifiers can be used only "
+ "in interface block members for shader "
+ "inputs or outputs");
}
}
diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h
index 4dacc2ac62b..86ec057f288 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -422,6 +422,11 @@ struct _mesa_glsl_parse_state {
unsigned MaxAtomicCounterBufferSize;
/* ARB_compute_shader */
+ unsigned MaxComputeAtomicCounterBuffers;
+ unsigned MaxComputeAtomicCounters;
+ unsigned MaxComputeImageUniforms;
+ unsigned MaxComputeTextureImageUnits;
+ unsigned MaxComputeUniformComponents;
unsigned MaxComputeWorkGroupCount[3];
unsigned MaxComputeWorkGroupSize[3];
@@ -588,6 +593,8 @@ struct _mesa_glsl_parse_state {
bool OES_geometry_point_size_warn;
bool OES_geometry_shader_enable;
bool OES_geometry_shader_warn;
+ bool OES_shader_image_atomic_enable;
+ bool OES_shader_image_atomic_warn;
bool OES_standard_derivatives_enable;
bool OES_standard_derivatives_warn;
bool OES_texture_3D_enable;
diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp
index 5debca32411..750f61744e7 100644
--- a/src/compiler/glsl/ir.cpp
+++ b/src/compiler/glsl/ir.cpp
@@ -1442,7 +1442,7 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
} else {
- assert(sampler->type->sampler_type == (int) type->base_type);
+ assert(sampler->type->sampled_type == (int) type->base_type);
if (sampler->type->sampler_shadow)
assert(type->vector_elements == 4 || type->vector_elements == 1);
else
@@ -1696,21 +1696,6 @@ interpolation_string(unsigned interpolation)
return "";
}
-
-glsl_interp_qualifier
-ir_variable::determine_interpolation_mode(bool flat_shade)
-{
- if (this->data.interpolation != INTERP_QUALIFIER_NONE)
- return (glsl_interp_qualifier) this->data.interpolation;
- int location = this->data.location;
- bool is_gl_Color =
- location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1;
- if (flat_shade && is_gl_Color)
- return INTERP_QUALIFIER_FLAT;
- else
- return INTERP_QUALIFIER_SMOOTH;
-}
-
const char *const ir_variable::warn_extension_table[] = {
"",
"GL_ARB_shader_stencil_export",
diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index bf9b7caffae..93c893d36fe 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -432,17 +432,6 @@ public:
/**
- * Determine how this variable should be interpolated based on its
- * interpolation qualifier (if present), whether it is gl_Color or
- * gl_SecondaryColor, and whether flatshading is enabled in the current GL
- * state.
- *
- * The return value will always be either INTERP_QUALIFIER_SMOOTH,
- * INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT.
- */
- glsl_interp_qualifier determine_interpolation_mode(bool flat_shade);
-
- /**
* Determine whether or not a variable is part of a uniform or
* shader storage block.
*/
diff --git a/src/compiler/glsl/ir_clone.cpp b/src/compiler/glsl/ir_clone.cpp
index b32ec17f1af..43ffffb0a38 100644
--- a/src/compiler/glsl/ir_clone.cpp
+++ b/src/compiler/glsl/ir_clone.cpp
@@ -366,7 +366,6 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
return c;
}
- case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_ATOMIC_UINT:
@@ -374,6 +373,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
case GLSL_TYPE_ERROR:
case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_FUNCTION:
assert(!"Should not get here.");
break;
}
diff --git a/src/compiler/glsl/link_uniform_initializers.cpp b/src/compiler/glsl/link_uniform_initializers.cpp
index cdc1d3ac7be..3609f81771e 100644
--- a/src/compiler/glsl/link_uniform_initializers.cpp
+++ b/src/compiler/glsl/link_uniform_initializers.cpp
@@ -88,9 +88,9 @@ copy_constant_to_storage(union gl_constant_value *storage,
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_INTERFACE:
- case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_VOID:
case GLSL_TYPE_SUBROUTINE:
+ case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_ERROR:
/* All other types should have already been filtered by other
* paths in the caller.
diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp
index 7072c16cb28..deaba94df1c 100644
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -649,15 +649,15 @@ private:
current_var->data.image_write_only ? GL_WRITE_ONLY :
GL_READ_WRITE);
- for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j)
- prog->_LinkedShaders[shader_type]->
- ImageAccess[this->next_image + j] = access;
+ const unsigned first = this->next_image;
/* Increment the image index by 1 for non-arrays and by the
* number of array elements for arrays.
*/
this->next_image += MAX2(1, uniform->array_elements);
+ for (unsigned i = first; i < MIN2(next_image, MAX_IMAGE_UNIFORMS); i++)
+ prog->_LinkedShaders[shader_type]->ImageAccess[i] = access;
}
}
@@ -1038,9 +1038,43 @@ assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id,
uniform_size->map->put(hidden_uniform_start + hidden_id, name);
}
+/**
+ * Search through the list of empty blocks to find one that fits the current
+ * uniform.
+ */
+static int
+find_empty_block(struct gl_shader_program *prog,
+ struct gl_uniform_storage *uniform)
+{
+ const unsigned entries = MAX2(1, uniform->array_elements);
+
+ foreach_list_typed(struct empty_uniform_block, block, link,
+ &prog->EmptyUniformLocations) {
+ /* Found a block with enough slots to fit the uniform */
+ if (block->slots == entries) {
+ unsigned start = block->start;
+ exec_node_remove(&block->link);
+ ralloc_free(block);
+
+ return start;
+ /* Found a block with more slots than needed. It can still be used. */
+ } else if (block->slots > entries) {
+ unsigned start = block->start;
+ block->start += entries;
+ block->slots -= entries;
+
+ return start;
+ }
+ }
+
+ return -1;
+}
+
void
link_assign_uniform_locations(struct gl_shader_program *prog,
- unsigned int boolean_true)
+ unsigned int boolean_true,
+ unsigned int num_explicit_uniform_locs,
+ unsigned int max_uniform_locs)
{
ralloc_free(prog->UniformStorage);
prog->UniformStorage = NULL;
@@ -1131,6 +1165,9 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data);
+ unsigned total_entries = num_explicit_uniform_locs;
+ unsigned empty_locs = prog->NumUniformRemapTable - num_explicit_uniform_locs;
+
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;
@@ -1194,21 +1231,44 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
/* how many new entries for this uniform? */
const unsigned entries = MAX2(1, uniforms[i].array_elements);
- /* resize remap table to fit new entries */
- prog->UniformRemapTable =
- reralloc(prog,
- prog->UniformRemapTable,
- gl_uniform_storage *,
- prog->NumUniformRemapTable + entries);
+ /* Find UniformRemapTable for empty blocks where we can fit this uniform. */
+ int chosen_location = -1;
+
+ if (empty_locs)
+ chosen_location = find_empty_block(prog, &uniforms[i]);
+
+ /* Add new entries to the total amount of entries. */
+ total_entries += entries;
+
+ if (chosen_location != -1) {
+ empty_locs -= entries;
+ } else {
+ chosen_location = prog->NumUniformRemapTable;
+
+ /* resize remap table to fit new entries */
+ prog->UniformRemapTable =
+ reralloc(prog,
+ prog->UniformRemapTable,
+ gl_uniform_storage *,
+ prog->NumUniformRemapTable + entries);
+ prog->NumUniformRemapTable += entries;
+ }
/* set pointers for this uniform */
for (unsigned j = 0; j < entries; j++)
- prog->UniformRemapTable[prog->NumUniformRemapTable+j] = &uniforms[i];
+ prog->UniformRemapTable[chosen_location + j] = &uniforms[i];
/* set the base location in remap table for the uniform */
- uniforms[i].remap_location = prog->NumUniformRemapTable;
+ uniforms[i].remap_location = chosen_location;
+ }
+
+ /* Verify that total amount of entries for explicit and implicit locations
+ * is less than MAX_UNIFORM_LOCATIONS.
+ */
- prog->NumUniformRemapTable += entries;
+ if (total_entries > max_uniform_locs) {
+ linker_error(prog, "count of uniform locations > MAX_UNIFORM_LOCATIONS"
+ "(%u > %u)", total_entries, max_uniform_locs);
}
/* Reserve all the explicit locations of the active subroutine uniforms. */
diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index 590de174507..05cc1a2b7f8 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1739,22 +1739,7 @@ assign_varying_locations(struct gl_context *ctx,
if (var && var->data.mode == ir_var_shader_in &&
var->data.is_unmatched_generic_inout) {
- if (prog->IsES) {
- /*
- * On Page 91 (Page 97 of the PDF) of the GLSL ES 1.0 spec:
- *
- * If the vertex shader declares but doesn't write to a
- * varying and the fragment shader declares and reads it,
- * is this an error?
- *
- * RESOLUTION: No.
- */
- linker_warning(prog, "%s shader varying %s not written "
- "by %s shader\n.",
- _mesa_shader_stage_to_string(consumer->Stage),
- var->name,
- _mesa_shader_stage_to_string(producer->Stage));
- } else if (prog->Version <= 120) {
+ if (!prog->IsES && prog->Version <= 120) {
/* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
*
* Only those varying variables used (i.e. read) in
@@ -1772,6 +1757,12 @@ assign_varying_locations(struct gl_context *ctx,
_mesa_shader_stage_to_string(consumer->Stage),
var->name,
_mesa_shader_stage_to_string(producer->Stage));
+ } else {
+ linker_warning(prog, "%s shader varying %s not written "
+ "by %s shader\n.",
+ _mesa_shader_stage_to_string(consumer->Stage),
+ var->name,
+ _mesa_shader_stage_to_string(producer->Stage));
}
}
}
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index bad1c1742b7..5326bfd4d68 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -3008,12 +3008,13 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
* for a variable, checks for overlaps between other uniforms using explicit
* locations.
*/
-static bool
+static int
reserve_explicit_locations(struct gl_shader_program *prog,
string_to_uint_map *map, ir_variable *var)
{
unsigned slots = var->type->uniform_locations();
unsigned max_loc = var->data.location + slots - 1;
+ unsigned return_value = slots;
/* Resize remap table if locations do not fit in the current one. */
if (max_loc + 1 > prog->NumUniformRemapTable) {
@@ -3024,7 +3025,7 @@ reserve_explicit_locations(struct gl_shader_program *prog,
if (!prog->UniformRemapTable) {
linker_error(prog, "Out of memory during linking.\n");
- return false;
+ return -1;
}
/* Initialize allocated space. */
@@ -3042,8 +3043,10 @@ reserve_explicit_locations(struct gl_shader_program *prog,
/* Possibly same uniform from a different stage, this is ok. */
unsigned hash_loc;
- if (map->get(hash_loc, var->name) && hash_loc == loc - i)
- continue;
+ if (map->get(hash_loc, var->name) && hash_loc == loc - i) {
+ return_value = 0;
+ continue;
+ }
/* ARB_explicit_uniform_location specification states:
*
@@ -3055,7 +3058,7 @@ reserve_explicit_locations(struct gl_shader_program *prog,
"location qualifier for uniform %s overlaps "
"previously used location\n",
var->name);
- return false;
+ return -1;
}
/* Initialize location as inactive before optimization
@@ -3067,7 +3070,7 @@ reserve_explicit_locations(struct gl_shader_program *prog,
/* Note, base location used for arrays. */
map->put(var->data.location, var->name);
- return true;
+ return return_value;
}
static bool
@@ -3128,12 +3131,12 @@ reserve_subroutine_explicit_locations(struct gl_shader_program *prog,
* any optimizations happen to handle also inactive uniforms and
* inactive array elements that may get trimmed away.
*/
-static void
+static int
check_explicit_uniform_locations(struct gl_context *ctx,
struct gl_shader_program *prog)
{
if (!ctx->Extensions.ARB_explicit_uniform_location)
- return;
+ return -1;
/* This map is used to detect if overlapping explicit locations
* occur with the same uniform (from different stage) or a different one.
@@ -3142,7 +3145,7 @@ check_explicit_uniform_locations(struct gl_context *ctx,
if (!uniform_map) {
linker_error(prog, "Out of memory during linking.\n");
- return;
+ return -1;
}
unsigned entries_total = 0;
@@ -3157,31 +3160,47 @@ check_explicit_uniform_locations(struct gl_context *ctx,
if (!var || var->data.mode != ir_var_uniform)
continue;
- entries_total += var->type->uniform_locations();
-
if (var->data.explicit_location) {
- bool ret;
+ bool ret = false;
if (var->type->without_array()->is_subroutine())
ret = reserve_subroutine_explicit_locations(prog, sh, var);
- else
- ret = reserve_explicit_locations(prog, uniform_map, var);
+ else {
+ int slots = reserve_explicit_locations(prog, uniform_map,
+ var);
+ if (slots != -1) {
+ ret = true;
+ entries_total += slots;
+ }
+ }
if (!ret) {
delete uniform_map;
- return;
+ return -1;
}
}
}
}
- /* Verify that total amount of entries for explicit and implicit locations
- * is less than MAX_UNIFORM_LOCATIONS.
- */
- if (entries_total >= ctx->Const.MaxUserAssignableUniformLocations) {
- linker_error(prog, "count of uniform locations >= MAX_UNIFORM_LOCATIONS"
- "(%u >= %u)", entries_total,
- ctx->Const.MaxUserAssignableUniformLocations);
+ exec_list_make_empty(&prog->EmptyUniformLocations);
+ struct empty_uniform_block *current_block = NULL;
+
+ for (unsigned i = 0; i < prog->NumUniformRemapTable; i++) {
+ /* We found empty space in UniformRemapTable. */
+ if (prog->UniformRemapTable[i] == NULL) {
+ /* We've found the beginning of a new continous block of empty slots */
+ if (!current_block || current_block->start + current_block->slots != i) {
+ current_block = rzalloc(prog, struct empty_uniform_block);
+ current_block->start = i;
+ exec_list_push_tail(&prog->EmptyUniformLocations,
+ &current_block->link);
+ }
+
+ /* The current block continues, so we simply increment its slots */
+ current_block->slots++;
+ }
}
+
delete uniform_map;
+ return entries_total;
}
static bool
@@ -4129,6 +4148,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
tfeedback_decl *tfeedback_decls = NULL;
unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying;
+ unsigned int num_explicit_uniform_locs = 0;
void *mem_ctx = ralloc_context(NULL); // temporary linker context
@@ -4310,7 +4330,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
last = i;
}
- check_explicit_uniform_locations(ctx, prog);
+ num_explicit_uniform_locs = check_explicit_uniform_locations(ctx, prog);
link_assign_subroutine_types(prog);
if (!prog->LinkStatus)
@@ -4541,7 +4561,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
goto done;
update_array_sizes(prog);
- link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue);
+ link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue,
+ num_explicit_uniform_locs,
+ ctx->Const.MaxUserAssignableUniformLocations);
link_assign_atomic_counter_resources(ctx, prog);
store_fragdepth_layout(prog);
diff --git a/src/compiler/glsl/linker.h b/src/compiler/glsl/linker.h
index c80be1c7e22..a60bb6ed087 100644
--- a/src/compiler/glsl/linker.h
+++ b/src/compiler/glsl/linker.h
@@ -35,7 +35,9 @@ link_invalidate_variable_locations(exec_list *ir);
extern void
link_assign_uniform_locations(struct gl_shader_program *prog,
- unsigned int boolean_true);
+ unsigned int boolean_true,
+ unsigned int num_explicit_uniform_locs,
+ unsigned int max_uniform_locs);
extern void
link_set_uniform_initializers(struct gl_shader_program *prog,
@@ -202,4 +204,17 @@ linker_error(gl_shader_program *prog, const char *fmt, ...);
void
linker_warning(gl_shader_program *prog, const char *fmt, ...);
+/**
+ * Sometimes there are empty slots left over in UniformRemapTable after we
+ * allocate slots to explicit locations. This struct represents a single
+ * continouous block of empty slots in UniformRemapTable.
+ */
+struct empty_uniform_block {
+ struct exec_node link;
+ /* The start location of the block */
+ unsigned start;
+ /* The number of slots in the block */
+ unsigned slots;
+};
+
#endif /* GLSL_LINKER_H */
diff --git a/src/compiler/glsl/lower_discard_flow.cpp b/src/compiler/glsl/lower_discard_flow.cpp
index 9d0a56b230d..9e3a7c05583 100644
--- a/src/compiler/glsl/lower_discard_flow.cpp
+++ b/src/compiler/glsl/lower_discard_flow.cpp
@@ -62,8 +62,8 @@ public:
{
}
+ ir_visitor_status visit(ir_loop_jump *ir);
ir_visitor_status visit_enter(ir_discard *ir);
- ir_visitor_status visit_enter(ir_loop_jump *ir);
ir_visitor_status visit_enter(ir_loop *ir);
ir_visitor_status visit_enter(ir_function_signature *ir);
@@ -76,7 +76,7 @@ public:
} /* anonymous namespace */
ir_visitor_status
-lower_discard_flow_visitor::visit_enter(ir_loop_jump *ir)
+lower_discard_flow_visitor::visit(ir_loop_jump *ir)
{
if (ir->mode != ir_loop_jump::jump_continue)
return visit_continue;
diff --git a/src/compiler/glsl/main.cpp b/src/compiler/glsl/main.cpp
index df93a013ede..d2535758e1a 100644
--- a/src/compiler/glsl/main.cpp
+++ b/src/compiler/glsl/main.cpp
@@ -58,10 +58,16 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.MaxComputeWorkGroupSize[1] = 1024;
ctx->Const.MaxComputeWorkGroupSize[2] = 64;
ctx->Const.MaxComputeWorkGroupInvocations = 1024;
+ ctx->Const.MaxComputeSharedMemorySize = 32768;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024;
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = 8;
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = 8;
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = 8;
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformBlocks = 12;
switch (ctx->Const.GLSLVersion) {
case 100:
@@ -77,12 +83,14 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 8;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 128 * 4;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 128 * 4;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits =
ctx->Const.MaxCombinedTextureImageUnits;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 16 * 4;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 16 * 4;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
@@ -103,12 +111,14 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 512;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits =
ctx->Const.MaxCombinedTextureImageUnits;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 64;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
@@ -129,11 +139,13 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
@@ -153,17 +165,20 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents = 1024;
+ ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents =
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
@@ -191,11 +206,13 @@ initialize_context(struct gl_context *ctx, gl_api api)
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 16 * 4;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 224;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 224;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 15 * 4;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */
diff --git a/src/compiler/glsl/tests/sampler_types_test.cpp b/src/compiler/glsl/tests/sampler_types_test.cpp
index 04dd65e6e8d..ef03158bba9 100644
--- a/src/compiler/glsl/tests/sampler_types_test.cpp
+++ b/src/compiler/glsl/tests/sampler_types_test.cpp
@@ -43,7 +43,7 @@ TEST(sampler_types, TYPE) \
const glsl_type *type = glsl_type::TYPE##_type; \
EXPECT_EQ(GLSL_TYPE_SAMPLER, type->base_type); \
EXPECT_EQ(DIM, type->sampler_dimensionality); \
- EXPECT_EQ(DATA_TYPE, type->sampler_type); \
+ EXPECT_EQ(DATA_TYPE, type->sampled_type); \
ARR; \
SHAD; \
EXPECT_EQ(COMPS, type->coordinate_components()); \
diff --git a/src/compiler/glsl/tests/uniform_initializer_utils.cpp b/src/compiler/glsl/tests/uniform_initializer_utils.cpp
index 5006387036f..ec64be18cb3 100644
--- a/src/compiler/glsl/tests/uniform_initializer_utils.cpp
+++ b/src/compiler/glsl/tests/uniform_initializer_utils.cpp
@@ -103,6 +103,7 @@ generate_data_element(void *mem_ctx, const glsl_type *type,
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_SUBROUTINE:
+ case GLSL_TYPE_FUNCTION:
ASSERT_TRUE(false);
break;
}
@@ -136,6 +137,7 @@ generate_data_element(void *mem_ctx, const glsl_type *type,
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_SUBROUTINE:
+ case GLSL_TYPE_FUNCTION:
ASSERT_TRUE(false);
break;
}
@@ -241,6 +243,7 @@ verify_data(gl_constant_value *storage, unsigned storage_array_size,
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_SUBROUTINE:
+ case GLSL_TYPE_FUNCTION:
ASSERT_TRUE(false);
break;
}
diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index d2eaec173b3..c549230a83c 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -51,7 +51,7 @@ glsl_type::glsl_type(GLenum gl_type,
gl_type(gl_type),
base_type(base_type),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
- sampler_type(0), interface_packing(0),
+ sampled_type(0), interface_packing(0),
vector_elements(vector_elements), matrix_columns(matrix_columns),
length(0)
{
@@ -75,7 +75,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
gl_type(gl_type),
base_type(base_type),
sampler_dimensionality(dim), sampler_shadow(shadow),
- sampler_array(array), sampler_type(type), interface_packing(0),
+ sampler_array(array), sampled_type(type), interface_packing(0),
length(0)
{
mtx_lock(&glsl_type::mutex);
@@ -101,7 +101,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
gl_type(0),
base_type(GLSL_TYPE_STRUCT),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
- sampler_type(0), interface_packing(0),
+ sampled_type(0), interface_packing(0),
vector_elements(0), matrix_columns(0),
length(num_fields)
{
@@ -141,7 +141,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
gl_type(0),
base_type(GLSL_TYPE_INTERFACE),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
- sampler_type(0), interface_packing((unsigned) packing),
+ sampled_type(0), interface_packing((unsigned) packing),
vector_elements(0), matrix_columns(0),
length(num_fields)
{
@@ -180,7 +180,7 @@ glsl_type::glsl_type(const glsl_type *return_type,
gl_type(0),
base_type(GLSL_TYPE_FUNCTION),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
- sampler_type(0), interface_packing(0),
+ sampled_type(0), interface_packing(0),
vector_elements(0), matrix_columns(0),
length(num_params)
{
@@ -212,7 +212,7 @@ glsl_type::glsl_type(const char *subroutine_name) :
gl_type(0),
base_type(GLSL_TYPE_SUBROUTINE),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
- sampler_type(0), interface_packing(0),
+ sampled_type(0), interface_packing(0),
vector_elements(1), matrix_columns(1),
length(0)
{
@@ -428,7 +428,7 @@ _mesa_glsl_release_types(void)
glsl_type::glsl_type(const glsl_type *array, unsigned length) :
base_type(GLSL_TYPE_ARRAY),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
- sampler_type(0), interface_packing(0),
+ sampled_type(0), interface_packing(0),
vector_elements(0), matrix_columns(0),
length(length), name(NULL)
{
diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index 5965cb2eedb..2f612d8857d 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -56,11 +56,11 @@ enum glsl_base_type {
GLSL_TYPE_IMAGE,
GLSL_TYPE_ATOMIC_UINT,
GLSL_TYPE_STRUCT,
- GLSL_TYPE_FUNCTION,
GLSL_TYPE_INTERFACE,
GLSL_TYPE_ARRAY,
GLSL_TYPE_VOID,
GLSL_TYPE_SUBROUTINE,
+ GLSL_TYPE_FUNCTION,
GLSL_TYPE_ERROR
};
@@ -122,7 +122,7 @@ struct glsl_type {
unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */
unsigned sampler_shadow:1;
unsigned sampler_array:1;
- unsigned sampler_type:2; /**< Type of data returned using this
+ unsigned sampled_type:2; /**< Type of data returned using this
* sampler or image. Only \c
* GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT,
* and \c GLSL_TYPE_UINT are valid.
diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
index 00703fe6f52..70e9cd397fc 100644
--- a/src/compiler/nir_types.cpp
+++ b/src/compiler/nir_types.cpp
@@ -148,7 +148,7 @@ glsl_base_type
glsl_get_sampler_result_type(const struct glsl_type *type)
{
assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
- return (glsl_base_type)type->sampler_type;
+ return (glsl_base_type)type->sampled_type;
}
unsigned
@@ -315,6 +315,12 @@ glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array,
}
const struct glsl_type *
+glsl_bare_sampler_type()
+{
+ return glsl_type::sampler_type;
+}
+
+const struct glsl_type *
glsl_image_type(enum glsl_sampler_dim dim, bool is_array,
enum glsl_base_type base_type)
{
@@ -331,6 +337,7 @@ glsl_function_type(const glsl_type *return_type,
const glsl_type *
glsl_transposed_type(const struct glsl_type *type)
{
+ assert(glsl_type_is_matrix(type));
return glsl_type::get_instance(type->base_type, type->matrix_columns,
type->vector_elements);
}
diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
index 4ef0dcf9a31..d92605bf4fb 100644
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -113,6 +113,7 @@ const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields,
const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim,
bool is_shadow, bool is_array,
enum glsl_base_type base_type);
+const struct glsl_type *glsl_bare_sampler_type();
const struct glsl_type *glsl_image_type(enum glsl_sampler_dim dim,
bool is_array,
enum glsl_base_type base_type);
diff --git a/src/egl/Android.mk b/src/egl/Android.mk
index ebd67af34cc..cf7125145ca 100644
--- a/src/egl/Android.mk
+++ b/src/egl/Android.mk
@@ -44,9 +44,8 @@ LOCAL_CFLAGS := \
-DHAVE_ANDROID_PLATFORM
ifeq ($(MESA_LOLLIPOP_BUILD),true)
-LOCAL_CFLAGS_arm := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
-LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
-LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
+LOCAL_CFLAGS_32 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
+LOCAL_CFLAGS_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
else
LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
endif
diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c
index 8f3abcb9867..7d546650272 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -532,7 +532,12 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
{ HAL_PIXEL_FORMAT_RGB_888, { 0xff, 0xff00, 0xff0000, 0x0 } },
{ HAL_PIXEL_FORMAT_RGB_565, { 0xf800, 0x7e0, 0x1f, 0x0 } },
{ HAL_PIXEL_FORMAT_BGRA_8888, { 0xff0000, 0xff00, 0xff, 0xff000000 } },
- { 0, 0, { 0, 0, 0, 0 } }
+ { 0, { 0, 0, 0, 0 } }
+ };
+ EGLint config_attrs[] = {
+ EGL_NATIVE_VISUAL_ID, 0,
+ EGL_NATIVE_VISUAL_TYPE, 0,
+ EGL_NONE
};
int count, i, j;
@@ -540,6 +545,9 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
for (i = 0; visuals[i].format; i++) {
int format_count = 0;
+ config_attrs[1] = visuals[i].format;
+ config_attrs[3] = visuals[i].format;
+
for (j = 0; dri2_dpy->driver_configs[j]; j++) {
const EGLint surface_type = EGL_WINDOW_BIT | EGL_PBUFFER_BIT;
struct dri2_egl_config *dri2_conf;
@@ -553,10 +561,8 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
continue;
dri2_conf = dri2_add_config(dpy, dri2_dpy->driver_configs[j],
- count + 1, surface_type, NULL, visuals[i].rgba_masks);
+ count + 1, surface_type, config_attrs, visuals[i].rgba_masks);
if (dri2_conf) {
- dri2_conf->base.NativeVisualID = visuals[i].format;
- dri2_conf->base.NativeVisualType = visuals[i].format;
count++;
format_count++;
}
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index 08cbf2d8393..420f567651c 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -472,6 +472,8 @@ dri2_x11_get_buffers(__DRIdrawable * driDrawable,
dri2_surf->drawable,
count, count, attachments);
reply = xcb_dri2_get_buffers_reply (dri2_dpy->conn, cookie, NULL);
+ if (reply == NULL)
+ return NULL;
buffers = xcb_dri2_get_buffers_buffers (reply);
if (buffers == NULL)
return NULL;
@@ -870,7 +872,12 @@ dri2_x11_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
if (dri2_dpy->dri2) {
- return dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1;
+ if (dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1) {
+ return EGL_TRUE;
+ }
+ /* Swap failed with a window drawable. */
+ _eglError(EGL_BAD_NATIVE_WINDOW, __FUNCTION__);
+ return EGL_FALSE;
} else {
assert(dri2_dpy->swrast);
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 323634e4511..32f68233aeb 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -1555,8 +1555,14 @@ eglGetSyncAttrib(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *valu
static EGLBoolean EGLAPIENTRY
eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value)
{
- EGLAttrib attrib = *value;
- EGLBoolean result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);
+ EGLAttrib attrib;
+ EGLBoolean result;
+
+ if (!value)
+ RETURN_EGL_ERROR(NULL, EGL_BAD_PARAMETER, EGL_FALSE);
+
+ attrib = *value;
+ result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);
/* The EGL_KHR_fence_sync spec says this about eglGetSyncAttribKHR:
*
diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c
index 3019e6e9333..999cb480c4b 100644
--- a/src/egl/main/eglsync.c
+++ b/src/egl/main/eglsync.c
@@ -144,9 +144,6 @@ EGLBoolean
_eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
EGLint attribute, EGLAttrib *value)
{
- if (!value)
- return _eglError(EGL_BAD_PARAMETER, "eglGetSyncAttribKHR");
-
switch (attribute) {
case EGL_SYNC_TYPE_KHR:
*value = sync->Type;
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 6b29b20c53e..f0013f70472 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -69,8 +69,11 @@ struct cso_context {
boolean has_geometry_shader;
boolean has_tessellation;
+ boolean has_compute_shader;
boolean has_streamout;
+ unsigned saved_state; /**< bitmask of CSO_BIT_x flags */
+
struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned nr_fragment_views;
@@ -106,6 +109,7 @@ struct cso_context {
void *geometry_shader, *geometry_shader_saved;
void *tessctrl_shader, *tessctrl_shader_saved;
void *tesseval_shader, *tesseval_shader_saved;
+ void *compute_shader;
void *velements, *velements_saved;
struct pipe_query *render_condition, *render_condition_saved;
uint render_condition_mode, render_condition_mode_saved;
@@ -272,6 +276,15 @@ struct cso_context *cso_create_context( struct pipe_context *pipe )
PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
ctx->has_tessellation = TRUE;
}
+ if (pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
+ int supported_irs =
+ pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_SUPPORTED_IRS);
+ if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
+ ctx->has_compute_shader = TRUE;
+ }
+ }
if (pipe->screen->get_param(pipe->screen,
PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) {
ctx->has_streamout = TRUE;
@@ -333,6 +346,10 @@ void cso_destroy_context( struct cso_context *ctx )
ctx->pipe->bind_tes_state(ctx->pipe, NULL);
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_EVAL, 0, NULL);
}
+ if (ctx->has_compute_shader) {
+ ctx->pipe->bind_compute_state(ctx->pipe, NULL);
+ ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_COMPUTE, 0, NULL);
+ }
ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
if (ctx->has_streamout)
@@ -425,13 +442,15 @@ enum pipe_error cso_set_blend(struct cso_context *ctx,
return PIPE_OK;
}
-void cso_save_blend(struct cso_context *ctx)
+static void
+cso_save_blend(struct cso_context *ctx)
{
assert(!ctx->blend_saved);
ctx->blend_saved = ctx->blend;
}
-void cso_restore_blend(struct cso_context *ctx)
+static void
+cso_restore_blend(struct cso_context *ctx)
{
if (ctx->blend != ctx->blend_saved) {
ctx->blend = ctx->blend_saved;
@@ -488,13 +507,15 @@ cso_set_depth_stencil_alpha(struct cso_context *ctx,
return PIPE_OK;
}
-void cso_save_depth_stencil_alpha(struct cso_context *ctx)
+static void
+cso_save_depth_stencil_alpha(struct cso_context *ctx)
{
assert(!ctx->depth_stencil_saved);
ctx->depth_stencil_saved = ctx->depth_stencil;
}
-void cso_restore_depth_stencil_alpha(struct cso_context *ctx)
+static void
+cso_restore_depth_stencil_alpha(struct cso_context *ctx)
{
if (ctx->depth_stencil != ctx->depth_stencil_saved) {
ctx->depth_stencil = ctx->depth_stencil_saved;
@@ -547,13 +568,15 @@ enum pipe_error cso_set_rasterizer(struct cso_context *ctx,
return PIPE_OK;
}
-void cso_save_rasterizer(struct cso_context *ctx)
+static void
+cso_save_rasterizer(struct cso_context *ctx)
{
assert(!ctx->rasterizer_saved);
ctx->rasterizer_saved = ctx->rasterizer;
}
-void cso_restore_rasterizer(struct cso_context *ctx)
+static void
+cso_restore_rasterizer(struct cso_context *ctx)
{
if (ctx->rasterizer != ctx->rasterizer_saved) {
ctx->rasterizer = ctx->rasterizer_saved;
@@ -581,13 +604,15 @@ void cso_delete_fragment_shader(struct cso_context *ctx, void *handle )
ctx->pipe->delete_fs_state(ctx->pipe, handle);
}
-void cso_save_fragment_shader(struct cso_context *ctx)
+static void
+cso_save_fragment_shader(struct cso_context *ctx)
{
assert(!ctx->fragment_shader_saved);
ctx->fragment_shader_saved = ctx->fragment_shader;
}
-void cso_restore_fragment_shader(struct cso_context *ctx)
+static void
+cso_restore_fragment_shader(struct cso_context *ctx)
{
if (ctx->fragment_shader_saved != ctx->fragment_shader) {
ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved);
@@ -615,13 +640,15 @@ void cso_delete_vertex_shader(struct cso_context *ctx, void *handle )
ctx->pipe->delete_vs_state(ctx->pipe, handle);
}
-void cso_save_vertex_shader(struct cso_context *ctx)
+static void
+cso_save_vertex_shader(struct cso_context *ctx)
{
assert(!ctx->vertex_shader_saved);
ctx->vertex_shader_saved = ctx->vertex_shader;
}
-void cso_restore_vertex_shader(struct cso_context *ctx)
+static void
+cso_restore_vertex_shader(struct cso_context *ctx)
{
if (ctx->vertex_shader_saved != ctx->vertex_shader) {
ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved);
@@ -640,12 +667,14 @@ void cso_set_framebuffer(struct cso_context *ctx,
}
}
-void cso_save_framebuffer(struct cso_context *ctx)
+static void
+cso_save_framebuffer(struct cso_context *ctx)
{
util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb);
}
-void cso_restore_framebuffer(struct cso_context *ctx)
+static void
+cso_restore_framebuffer(struct cso_context *ctx)
{
if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) {
util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved);
@@ -664,13 +693,33 @@ void cso_set_viewport(struct cso_context *ctx,
}
}
-void cso_save_viewport(struct cso_context *ctx)
+/**
+ * Setup viewport state for given width and height (position is always (0,0)).
+ * Invert the Y axis if 'invert' is true.
+ */
+void
+cso_set_viewport_dims(struct cso_context *ctx,
+ float width, float height, boolean invert)
+{
+ struct pipe_viewport_state vp;
+ vp.scale[0] = width * 0.5f;
+ vp.scale[1] = height * (invert ? -0.5f : 0.5f);
+ vp.scale[2] = 0.5f;
+ vp.translate[0] = 0.5f * width;
+ vp.translate[1] = 0.5f * height;
+ vp.translate[2] = 0.5f;
+ cso_set_viewport(ctx, &vp);
+}
+
+static void
+cso_save_viewport(struct cso_context *ctx)
{
ctx->vp_saved = ctx->vp;
}
-void cso_restore_viewport(struct cso_context *ctx)
+static void
+cso_restore_viewport(struct cso_context *ctx)
{
if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) {
ctx->vp = ctx->vp_saved;
@@ -696,12 +745,14 @@ void cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask)
}
}
-void cso_save_sample_mask(struct cso_context *ctx)
+static void
+cso_save_sample_mask(struct cso_context *ctx)
{
ctx->sample_mask_saved = ctx->sample_mask;
}
-void cso_restore_sample_mask(struct cso_context *ctx)
+static void
+cso_restore_sample_mask(struct cso_context *ctx)
{
cso_set_sample_mask(ctx, ctx->sample_mask_saved);
}
@@ -714,12 +765,14 @@ void cso_set_min_samples(struct cso_context *ctx, unsigned min_samples)
}
}
-void cso_save_min_samples(struct cso_context *ctx)
+static void
+cso_save_min_samples(struct cso_context *ctx)
{
ctx->min_samples_saved = ctx->min_samples;
}
-void cso_restore_min_samples(struct cso_context *ctx)
+static void
+cso_restore_min_samples(struct cso_context *ctx)
{
cso_set_min_samples(ctx, ctx->min_samples_saved);
}
@@ -733,13 +786,15 @@ void cso_set_stencil_ref(struct cso_context *ctx,
}
}
-void cso_save_stencil_ref(struct cso_context *ctx)
+static void
+cso_save_stencil_ref(struct cso_context *ctx)
{
ctx->stencil_ref_saved = ctx->stencil_ref;
}
-void cso_restore_stencil_ref(struct cso_context *ctx)
+static void
+cso_restore_stencil_ref(struct cso_context *ctx)
{
if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved,
sizeof(ctx->stencil_ref))) {
@@ -764,14 +819,16 @@ void cso_set_render_condition(struct cso_context *ctx,
}
}
-void cso_save_render_condition(struct cso_context *ctx)
+static void
+cso_save_render_condition(struct cso_context *ctx)
{
ctx->render_condition_saved = ctx->render_condition;
ctx->render_condition_cond_saved = ctx->render_condition_cond;
ctx->render_condition_mode_saved = ctx->render_condition_mode;
}
-void cso_restore_render_condition(struct cso_context *ctx)
+static void
+cso_restore_render_condition(struct cso_context *ctx)
{
cso_set_render_condition(ctx, ctx->render_condition_saved,
ctx->render_condition_cond_saved,
@@ -798,7 +855,8 @@ void cso_delete_geometry_shader(struct cso_context *ctx, void *handle)
ctx->pipe->delete_gs_state(ctx->pipe, handle);
}
-void cso_save_geometry_shader(struct cso_context *ctx)
+static void
+cso_save_geometry_shader(struct cso_context *ctx)
{
if (!ctx->has_geometry_shader) {
return;
@@ -808,7 +866,8 @@ void cso_save_geometry_shader(struct cso_context *ctx)
ctx->geometry_shader_saved = ctx->geometry_shader;
}
-void cso_restore_geometry_shader(struct cso_context *ctx)
+static void
+cso_restore_geometry_shader(struct cso_context *ctx)
{
if (!ctx->has_geometry_shader) {
return;
@@ -841,7 +900,8 @@ void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle)
ctx->pipe->delete_tcs_state(ctx->pipe, handle);
}
-void cso_save_tessctrl_shader(struct cso_context *ctx)
+static void
+cso_save_tessctrl_shader(struct cso_context *ctx)
{
if (!ctx->has_tessellation) {
return;
@@ -851,7 +911,8 @@ void cso_save_tessctrl_shader(struct cso_context *ctx)
ctx->tessctrl_shader_saved = ctx->tessctrl_shader;
}
-void cso_restore_tessctrl_shader(struct cso_context *ctx)
+static void
+cso_restore_tessctrl_shader(struct cso_context *ctx)
{
if (!ctx->has_tessellation) {
return;
@@ -884,7 +945,8 @@ void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle)
ctx->pipe->delete_tes_state(ctx->pipe, handle);
}
-void cso_save_tesseval_shader(struct cso_context *ctx)
+static void
+cso_save_tesseval_shader(struct cso_context *ctx)
{
if (!ctx->has_tessellation) {
return;
@@ -894,7 +956,8 @@ void cso_save_tesseval_shader(struct cso_context *ctx)
ctx->tesseval_shader_saved = ctx->tesseval_shader;
}
-void cso_restore_tesseval_shader(struct cso_context *ctx)
+static void
+cso_restore_tesseval_shader(struct cso_context *ctx)
{
if (!ctx->has_tessellation) {
return;
@@ -907,6 +970,26 @@ void cso_restore_tesseval_shader(struct cso_context *ctx)
ctx->tesseval_shader_saved = NULL;
}
+void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle)
+{
+ assert(ctx->has_compute_shader || !handle);
+
+ if (ctx->has_compute_shader && ctx->compute_shader != handle) {
+ ctx->compute_shader = handle;
+ ctx->pipe->bind_compute_state(ctx->pipe, handle);
+ }
+}
+
+void cso_delete_compute_shader(struct cso_context *ctx, void *handle)
+{
+ if (handle == ctx->compute_shader) {
+ /* unbind before deleting */
+ ctx->pipe->bind_compute_state(ctx->pipe, NULL);
+ ctx->compute_shader = NULL;
+ }
+ ctx->pipe->delete_compute_state(ctx->pipe, handle);
+}
+
enum pipe_error
cso_set_vertex_elements(struct cso_context *ctx,
unsigned count,
@@ -967,7 +1050,8 @@ cso_set_vertex_elements(struct cso_context *ctx,
return PIPE_OK;
}
-void cso_save_vertex_elements(struct cso_context *ctx)
+static void
+cso_save_vertex_elements(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
@@ -980,7 +1064,8 @@ void cso_save_vertex_elements(struct cso_context *ctx)
ctx->velements_saved = ctx->velements;
}
-void cso_restore_vertex_elements(struct cso_context *ctx)
+static void
+cso_restore_vertex_elements(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
@@ -1032,7 +1117,8 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
ctx->pipe->set_vertex_buffers(ctx->pipe, start_slot, count, buffers);
}
-void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
+static void
+cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
@@ -1047,7 +1133,8 @@ void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
sizeof(struct pipe_vertex_buffer));
}
-void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx)
+static void
+cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
@@ -1165,7 +1252,7 @@ cso_set_samplers(struct cso_context *ctx,
return error;
}
-void
+static void
cso_save_fragment_samplers(struct cso_context *ctx)
{
struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
@@ -1176,7 +1263,7 @@ cso_save_fragment_samplers(struct cso_context *ctx)
}
-void
+static void
cso_restore_fragment_samplers(struct cso_context *ctx)
{
struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
@@ -1223,7 +1310,7 @@ cso_set_sampler_views(struct cso_context *ctx,
}
-void
+static void
cso_save_fragment_sampler_views(struct cso_context *ctx)
{
unsigned i;
@@ -1238,7 +1325,7 @@ cso_save_fragment_sampler_views(struct cso_context *ctx)
}
-void
+static void
cso_restore_fragment_sampler_views(struct cso_context *ctx)
{
unsigned i, nr_saved = ctx->nr_fragment_views_saved;
@@ -1298,7 +1385,7 @@ cso_set_stream_outputs(struct cso_context *ctx,
ctx->nr_so_targets = num_targets;
}
-void
+static void
cso_save_stream_outputs(struct cso_context *ctx)
{
uint i;
@@ -1315,7 +1402,7 @@ cso_save_stream_outputs(struct cso_context *ctx)
}
}
-void
+static void
cso_restore_stream_outputs(struct cso_context *ctx)
{
struct pipe_context *pipe = ctx->pipe;
@@ -1402,6 +1489,113 @@ cso_restore_constant_buffer_slot0(struct cso_context *cso,
NULL);
}
+
+/**
+ * Save all the CSO state items specified by the state_mask bitmask
+ * of CSO_BIT_x flags.
+ */
+void
+cso_save_state(struct cso_context *cso, unsigned state_mask)
+{
+ assert(cso->saved_state == 0);
+
+ cso->saved_state = state_mask;
+
+ if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT)
+ cso_save_aux_vertex_buffer_slot(cso);
+ if (state_mask & CSO_BIT_BLEND)
+ cso_save_blend(cso);
+ if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA)
+ cso_save_depth_stencil_alpha(cso);
+ if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS)
+ cso_save_fragment_samplers(cso);
+ if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS)
+ cso_save_fragment_sampler_views(cso);
+ if (state_mask & CSO_BIT_FRAGMENT_SHADER)
+ cso_save_fragment_shader(cso);
+ if (state_mask & CSO_BIT_FRAMEBUFFER)
+ cso_save_framebuffer(cso);
+ if (state_mask & CSO_BIT_GEOMETRY_SHADER)
+ cso_save_geometry_shader(cso);
+ if (state_mask & CSO_BIT_MIN_SAMPLES)
+ cso_save_min_samples(cso);
+ if (state_mask & CSO_BIT_RASTERIZER)
+ cso_save_rasterizer(cso);
+ if (state_mask & CSO_BIT_RENDER_CONDITION)
+ cso_save_render_condition(cso);
+ if (state_mask & CSO_BIT_SAMPLE_MASK)
+ cso_save_sample_mask(cso);
+ if (state_mask & CSO_BIT_STENCIL_REF)
+ cso_save_stencil_ref(cso);
+ if (state_mask & CSO_BIT_STREAM_OUTPUTS)
+ cso_save_stream_outputs(cso);
+ if (state_mask & CSO_BIT_TESSCTRL_SHADER)
+ cso_save_tessctrl_shader(cso);
+ if (state_mask & CSO_BIT_TESSEVAL_SHADER)
+ cso_save_tesseval_shader(cso);
+ if (state_mask & CSO_BIT_VERTEX_ELEMENTS)
+ cso_save_vertex_elements(cso);
+ if (state_mask & CSO_BIT_VERTEX_SHADER)
+ cso_save_vertex_shader(cso);
+ if (state_mask & CSO_BIT_VIEWPORT)
+ cso_save_viewport(cso);
+}
+
+
+/**
+ * Restore the state which was saved by cso_save_state().
+ */
+void
+cso_restore_state(struct cso_context *cso)
+{
+ unsigned state_mask = cso->saved_state;
+
+ assert(state_mask);
+
+ if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT)
+ cso_restore_aux_vertex_buffer_slot(cso);
+ if (state_mask & CSO_BIT_BLEND)
+ cso_restore_blend(cso);
+ if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA)
+ cso_restore_depth_stencil_alpha(cso);
+ if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS)
+ cso_restore_fragment_samplers(cso);
+ if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS)
+ cso_restore_fragment_sampler_views(cso);
+ if (state_mask & CSO_BIT_FRAGMENT_SHADER)
+ cso_restore_fragment_shader(cso);
+ if (state_mask & CSO_BIT_FRAMEBUFFER)
+ cso_restore_framebuffer(cso);
+ if (state_mask & CSO_BIT_GEOMETRY_SHADER)
+ cso_restore_geometry_shader(cso);
+ if (state_mask & CSO_BIT_MIN_SAMPLES)
+ cso_restore_min_samples(cso);
+ if (state_mask & CSO_BIT_RASTERIZER)
+ cso_restore_rasterizer(cso);
+ if (state_mask & CSO_BIT_RENDER_CONDITION)
+ cso_restore_render_condition(cso);
+ if (state_mask & CSO_BIT_SAMPLE_MASK)
+ cso_restore_sample_mask(cso);
+ if (state_mask & CSO_BIT_STENCIL_REF)
+ cso_restore_stencil_ref(cso);
+ if (state_mask & CSO_BIT_STREAM_OUTPUTS)
+ cso_restore_stream_outputs(cso);
+ if (state_mask & CSO_BIT_TESSCTRL_SHADER)
+ cso_restore_tessctrl_shader(cso);
+ if (state_mask & CSO_BIT_TESSEVAL_SHADER)
+ cso_restore_tesseval_shader(cso);
+ if (state_mask & CSO_BIT_VERTEX_ELEMENTS)
+ cso_restore_vertex_elements(cso);
+ if (state_mask & CSO_BIT_VERTEX_SHADER)
+ cso_restore_vertex_shader(cso);
+ if (state_mask & CSO_BIT_VIEWPORT)
+ cso_restore_viewport(cso);
+
+ cso->saved_state = 0;
+}
+
+
+
/* drawing */
void
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index f0a27390d17..a3563d83a02 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -47,22 +47,15 @@ void cso_destroy_context( struct cso_context *cso );
enum pipe_error cso_set_blend( struct cso_context *cso,
const struct pipe_blend_state *blend );
-void cso_save_blend(struct cso_context *cso);
-void cso_restore_blend(struct cso_context *cso);
-
enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso,
const struct pipe_depth_stencil_alpha_state *dsa );
-void cso_save_depth_stencil_alpha(struct cso_context *cso);
-void cso_restore_depth_stencil_alpha(struct cso_context *cso);
enum pipe_error cso_set_rasterizer( struct cso_context *cso,
const struct pipe_rasterizer_state *rasterizer );
-void cso_save_rasterizer(struct cso_context *cso);
-void cso_restore_rasterizer(struct cso_context *cso);
enum pipe_error
@@ -71,11 +64,6 @@ cso_set_samplers(struct cso_context *cso,
unsigned count,
const struct pipe_sampler_state **states);
-void
-cso_save_fragment_samplers(struct cso_context *cso);
-
-void
-cso_restore_fragment_samplers(struct cso_context *cso);
/* Alternate interface to support state trackers that like to modify
* samplers one at a time:
@@ -91,9 +79,6 @@ cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage);
enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
unsigned count,
const struct pipe_vertex_element *states);
-void cso_save_vertex_elements(struct cso_context *ctx);
-void cso_restore_vertex_elements(struct cso_context *ctx);
-
void cso_set_vertex_buffers(struct cso_context *ctx,
unsigned start_slot, unsigned count,
@@ -101,8 +86,6 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
/* One vertex buffer slot is provided with the save/restore functionality.
* cso_context chooses the slot, it can be non-zero. */
-void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx);
-void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx);
unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx);
@@ -110,8 +93,6 @@ void cso_set_stream_outputs(struct cso_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
const unsigned *offsets);
-void cso_save_stream_outputs(struct cso_context *ctx);
-void cso_restore_stream_outputs(struct cso_context *ctx);
/*
@@ -123,67 +104,81 @@ void cso_restore_stream_outputs(struct cso_context *ctx);
void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_fragment_shader(struct cso_context *ctx, void *handle );
-void cso_save_fragment_shader(struct cso_context *cso);
-void cso_restore_fragment_shader(struct cso_context *cso);
void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_vertex_shader(struct cso_context *ctx, void *handle );
-void cso_save_vertex_shader(struct cso_context *cso);
-void cso_restore_vertex_shader(struct cso_context *cso);
void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_geometry_shader(struct cso_context *ctx, void *handle);
-void cso_save_geometry_shader(struct cso_context *cso);
-void cso_restore_geometry_shader(struct cso_context *cso);
void cso_set_tessctrl_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle);
-void cso_save_tessctrl_shader(struct cso_context *cso);
-void cso_restore_tessctrl_shader(struct cso_context *cso);
void cso_set_tesseval_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle);
-void cso_save_tesseval_shader(struct cso_context *cso);
-void cso_restore_tesseval_shader(struct cso_context *cso);
+
+
+void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle);
+void cso_delete_compute_shader(struct cso_context *ctx, void *handle);
void cso_set_framebuffer(struct cso_context *cso,
const struct pipe_framebuffer_state *fb);
-void cso_save_framebuffer(struct cso_context *cso);
-void cso_restore_framebuffer(struct cso_context *cso);
void cso_set_viewport(struct cso_context *cso,
const struct pipe_viewport_state *vp);
-void cso_save_viewport(struct cso_context *cso);
-void cso_restore_viewport(struct cso_context *cso);
+void cso_set_viewport_dims(struct cso_context *ctx,
+ float width, float height, boolean invert);
void cso_set_blend_color(struct cso_context *cso,
const struct pipe_blend_color *bc);
void cso_set_sample_mask(struct cso_context *cso, unsigned stencil_mask);
-void cso_save_sample_mask(struct cso_context *ctx);
-void cso_restore_sample_mask(struct cso_context *ctx);
void cso_set_min_samples(struct cso_context *cso, unsigned min_samples);
-void cso_save_min_samples(struct cso_context *ctx);
-void cso_restore_min_samples(struct cso_context *ctx);
void cso_set_stencil_ref(struct cso_context *cso,
const struct pipe_stencil_ref *sr);
-void cso_save_stencil_ref(struct cso_context *cso);
-void cso_restore_stencil_ref(struct cso_context *cso);
void cso_set_render_condition(struct cso_context *cso,
struct pipe_query *query,
boolean condition, uint mode);
-void cso_save_render_condition(struct cso_context *cso);
-void cso_restore_render_condition(struct cso_context *cso);
+
+
+#define CSO_BIT_AUX_VERTEX_BUFFER_SLOT 0x1
+#define CSO_BIT_BLEND 0x2
+#define CSO_BIT_DEPTH_STENCIL_ALPHA 0x4
+#define CSO_BIT_FRAGMENT_SAMPLERS 0x8
+#define CSO_BIT_FRAGMENT_SAMPLER_VIEWS 0x10
+#define CSO_BIT_FRAGMENT_SHADER 0x20
+#define CSO_BIT_FRAMEBUFFER 0x40
+#define CSO_BIT_GEOMETRY_SHADER 0x80
+#define CSO_BIT_MIN_SAMPLES 0x100
+#define CSO_BIT_RASTERIZER 0x200
+#define CSO_BIT_RENDER_CONDITION 0x400
+#define CSO_BIT_SAMPLE_MASK 0x800
+#define CSO_BIT_STENCIL_REF 0x1000
+#define CSO_BIT_STREAM_OUTPUTS 0x2000
+#define CSO_BIT_TESSCTRL_SHADER 0x4000
+#define CSO_BIT_TESSEVAL_SHADER 0x8000
+#define CSO_BIT_VERTEX_ELEMENTS 0x10000
+#define CSO_BIT_VERTEX_SHADER 0x20000
+#define CSO_BIT_VIEWPORT 0x40000
+
+#define CSO_BITS_ALL_SHADERS (CSO_BIT_VERTEX_SHADER | \
+ CSO_BIT_FRAGMENT_SHADER | \
+ CSO_BIT_GEOMETRY_SHADER | \
+ CSO_BIT_TESSCTRL_SHADER | \
+ CSO_BIT_TESSEVAL_SHADER)
+
+void cso_save_state(struct cso_context *cso, unsigned state_mask);
+void cso_restore_state(struct cso_context *cso);
/* sampler view state */
@@ -194,12 +189,6 @@ cso_set_sampler_views(struct cso_context *cso,
unsigned count,
struct pipe_sampler_view **views);
-void
-cso_save_fragment_sampler_views(struct cso_context *ctx);
-
-void
-cso_restore_fragment_sampler_views(struct cso_context *ctx);
-
/* constant buffers */
@@ -230,7 +219,6 @@ cso_draw_arrays_instanced(struct cso_context *cso, uint mode,
uint start, uint count,
uint start_instance, uint instance_count);
-/* helper drawing function */
void
cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index 7283e2f162f..efaf2fa306a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -26,6 +26,9 @@
**************************************************************************/
#include <stddef.h>
+#include <fstream>
+#include <sstream>
+#include <iomanip>
#include <llvm-c/Core.h>
#include <llvm-c/Disassembler.h>
@@ -125,7 +128,7 @@ lp_debug_dump_value(LLVMValueRef value)
* - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
*/
static size_t
-disassemble(const void* func)
+disassemble(const void* func, std::stringstream &buffer)
{
const uint8_t *bytes = (const uint8_t *)func;
@@ -143,8 +146,8 @@ disassemble(const void* func)
char outline[1024];
if (!D) {
- _debug_printf("error: couldn't create disassembler for triple %s\n",
- Triple.c_str());
+ buffer << "error: could not create disassembler for triple "
+ << Triple.c_str() << '\n';
return 0;
}
@@ -158,13 +161,13 @@ disassemble(const void* func)
* so that between runs.
*/
- _debug_printf("%6lu:\t", (unsigned long)pc);
+ buffer << std::setw(6) << (unsigned long)pc << ":\t";
Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, outline,
sizeof outline);
if (!Size) {
- _debug_printf("invalid\n");
+ buffer << "invalid\n";
pc += 1;
break;
}
@@ -176,10 +179,11 @@ disassemble(const void* func)
if (0) {
unsigned i;
for (i = 0; i < Size; ++i) {
- _debug_printf("%02x ", bytes[pc + i]);
+ buffer << std::hex << std::setfill('0') << std::setw(2)
+ << static_cast<int> (bytes[pc + i]);
}
for (; i < 16; ++i) {
- _debug_printf(" ");
+ buffer << std::dec << " ";
}
}
@@ -187,9 +191,7 @@ disassemble(const void* func)
* Print the instruction.
*/
- _debug_printf("%*s", Size, outline);
-
- _debug_printf("\n");
+ buffer << std::setw(Size) << outline << '\n';
/*
* Stop disassembling on return statements, if there is no record of a
@@ -198,9 +200,11 @@ disassemble(const void* func)
* XXX: This currently assumes x86
*/
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if (Size == 1 && bytes[pc] == 0xc3) {
break;
}
+#endif
/*
* Advance.
@@ -209,12 +213,12 @@ disassemble(const void* func)
pc += Size;
if (pc >= extent) {
- _debug_printf("disassembly larger than %ull bytes, aborting\n", extent);
+ buffer << "disassembly larger than " << extent << " bytes, aborting\n";
break;
}
}
- _debug_printf("\n");
+ buffer << '\n';
LLVMDisasmDispose(D);
@@ -222,7 +226,8 @@ disassemble(const void* func)
* Print GDB command, useful to verify output.
*/
if (0) {
- _debug_printf("disassemble %p %p\n", bytes, bytes + pc);
+ buffer << "disassemble " << static_cast<const void*>(bytes) << ' '
+ << static_cast<const void*>(bytes + pc) << '\n';
}
return pc;
@@ -231,8 +236,14 @@ disassemble(const void* func)
extern "C" void
lp_disassemble(LLVMValueRef func, const void *code) {
- _debug_printf("%s:\n", LLVMGetValueName(func));
- disassemble(code);
+ std::stringstream buffer;
+ std::string s;
+
+ buffer << LLVMGetValueName(func) << ":\n";
+ disassemble(code, buffer);
+ s = buffer.str();
+ _debug_printf("%s", s.c_str());
+ _debug_printf("\n");
}
@@ -248,9 +259,10 @@ extern "C" void
lp_profile(LLVMValueRef func, const void *code)
{
#if defined(__linux__) && defined(PROFILE)
+ std::stringstream buffer;
+ static std::ofstream perf_asm_file;
static boolean first_time = TRUE;
static FILE *perf_map_file = NULL;
- static int perf_asm_fd = -1;
if (first_time) {
/*
* We rely on the disassembler for determining a function's size, but
@@ -264,17 +276,16 @@ lp_profile(LLVMValueRef func, const void *code)
util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid);
perf_map_file = fopen(filename, "wt");
util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid);
- mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
- perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode);
+ perf_asm_file.open(filename);
}
first_time = FALSE;
}
if (perf_map_file) {
const char *symbol = LLVMGetValueName(func);
unsigned long addr = (uintptr_t)code;
- llvm::raw_fd_ostream Out(perf_asm_fd, false);
- Out << symbol << ":\n";
- unsigned long size = disassemble(code);
+ buffer << symbol << ":\n";
+ unsigned long size = disassemble(code, buffer);
+ perf_asm_file << buffer.rdbuf() << std::flush;
fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
fflush(perf_map_file);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 96aba7370c1..ab55be4c439 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -118,8 +118,10 @@ create_pass_manager(struct gallivm_state *gallivm)
* simple, or constant propagation into them, etc.
*/
+#if HAVE_LLVM < 0x0309
// Old versions of LLVM get the DataLayout from the pass manager.
LLVMAddTargetData(gallivm->target, gallivm->passmgr);
+#endif
/* Setting the module's DataLayout to an empty string will cause the
* ExecutionEngine to copy to the DataLayout string from its target
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index 4598db851ae..32addec9724 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -128,6 +128,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
return PIPE_MAX_SHADER_SAMPLER_VIEWS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 1 << PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
@@ -137,6 +139,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 3ee708f4fad..30ef37c9d22 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -61,6 +61,11 @@
#include <llvm/Target/TargetOptions.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ADT/Triple.h>
+#if HAVE_LLVM >= 0x0307
+#include <llvm/Analysis/TargetLibraryInfo.h>
+#else
+#include <llvm/Target/TargetLibraryInfo.h>
+#endif
#if HAVE_LLVM < 0x0306
#include <llvm/ExecutionEngine/JITMemoryManager.h>
#else
@@ -147,6 +152,31 @@ lp_set_target_options(void)
gallivm_init_llvm_targets();
}
+extern "C"
+LLVMTargetLibraryInfoRef
+gallivm_create_target_library_info(const char *triple)
+{
+ return reinterpret_cast<LLVMTargetLibraryInfoRef>(
+#if HAVE_LLVM < 0x0307
+ new llvm::TargetLibraryInfo(
+#else
+ new llvm::TargetLibraryInfoImpl(
+#endif
+ llvm::Triple(triple)));
+}
+
+extern "C"
+void
+gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
+{
+ delete reinterpret_cast<
+#if HAVE_LLVM < 0x0307
+ llvm::TargetLibraryInfo
+#else
+ llvm::TargetLibraryInfoImpl
+#endif
+ *>(library_info);
+}
extern "C"
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
index 86d2f86ac45..30b7b1674af 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
@@ -32,6 +32,7 @@
#include "lp_bld.h"
#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
#ifdef __cplusplus
@@ -44,6 +45,12 @@ struct lp_generated_code;
extern void
gallivm_init_llvm_targets(void);
+extern LLVMTargetLibraryInfoRef
+gallivm_create_target_library_info(const char *triple);
+
+extern void
+gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
+
extern void
lp_set_target_options(void);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 8c39ab0afe9..a19be8a503a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -2592,7 +2592,10 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
- /* XXX: for real msaa support, the w component would be the sample index. */
+ /*
+ * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
+ * would be the sample index.
+ */
for (i = 0; i < dims; i++) {
coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
@@ -2742,6 +2745,7 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
opcode == TGSI_OPCODE_SAMPLE_D ||
opcode == TGSI_OPCODE_SAMPLE_I ||
+ opcode == TGSI_OPCODE_SAMPLE_I_MS ||
opcode == TGSI_OPCODE_SAMPLE_L ||
opcode == TGSI_OPCODE_SVIEWINFO ||
opcode == TGSI_OPCODE_CAL ||
@@ -3989,6 +3993,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
diff --git a/src/gallium/auxiliary/hud/font.c b/src/gallium/auxiliary/hud/font.c
index 60e8ae514a5..067de9e39c7 100644
--- a/src/gallium/auxiliary/hud/font.c
+++ b/src/gallium/auxiliary/hud/font.c
@@ -199,6 +199,7 @@ static const GLubyte Fixed8x13_Character_123[] = { 8, 0, 0, 0, 14, 16, 16,
static const GLubyte Fixed8x13_Character_124[] = { 8, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0};
static const GLubyte Fixed8x13_Character_125[] = { 8, 0, 0, 0,112, 8, 8, 16, 12, 16, 8, 8,112, 0, 0};
static const GLubyte Fixed8x13_Character_126[] = { 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 84, 36, 0, 0};
+#if 0 /* currently unused */
static const GLubyte Fixed8x13_Character_127[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_128[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_129[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
@@ -232,6 +233,7 @@ static const GLubyte Fixed8x13_Character_156[] = { 9, 0, 0, 0, 0, 0, 0,17
static const GLubyte Fixed8x13_Character_157[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_158[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_159[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0};
+#endif
static const GLubyte Fixed8x13_Character_160[] = { 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
static const GLubyte Fixed8x13_Character_161[] = { 8, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 0, 16, 0, 0};
static const GLubyte Fixed8x13_Character_162[] = { 8, 0, 0, 0, 0, 16, 56, 84, 80, 80, 84, 56, 16, 0, 0};
diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index 75afebe4919..fb998349a35 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -460,25 +460,25 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
hud->constants.two_div_fb_height = 2.0f / hud->fb_height;
- cso_save_framebuffer(cso);
- cso_save_sample_mask(cso);
- cso_save_min_samples(cso);
- cso_save_blend(cso);
- cso_save_depth_stencil_alpha(cso);
- cso_save_fragment_shader(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_fragment_samplers(cso);
- cso_save_rasterizer(cso);
- cso_save_viewport(cso);
- cso_save_stream_outputs(cso);
- cso_save_geometry_shader(cso);
- cso_save_tessctrl_shader(cso);
- cso_save_tesseval_shader(cso);
- cso_save_vertex_shader(cso);
- cso_save_vertex_elements(cso);
- cso_save_aux_vertex_buffer_slot(cso);
+ cso_save_state(cso, (CSO_BIT_FRAMEBUFFER |
+ CSO_BIT_SAMPLE_MASK |
+ CSO_BIT_MIN_SAMPLES |
+ CSO_BIT_BLEND |
+ CSO_BIT_DEPTH_STENCIL_ALPHA |
+ CSO_BIT_FRAGMENT_SHADER |
+ CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+ CSO_BIT_FRAGMENT_SAMPLERS |
+ CSO_BIT_RASTERIZER |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_GEOMETRY_SHADER |
+ CSO_BIT_TESSCTRL_SHADER |
+ CSO_BIT_TESSEVAL_SHADER |
+ CSO_BIT_VERTEX_SHADER |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BIT_RENDER_CONDITION));
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
- cso_save_render_condition(cso);
/* set states */
memset(&surf_templ, 0, sizeof(surf_templ));
@@ -591,26 +591,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_pane_draw_colored_objects(hud, pane);
}
- /* restore states */
- cso_restore_framebuffer(cso);
- cso_restore_sample_mask(cso);
- cso_restore_min_samples(cso);
- cso_restore_blend(cso);
- cso_restore_depth_stencil_alpha(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_fragment_samplers(cso);
- cso_restore_rasterizer(cso);
- cso_restore_viewport(cso);
- cso_restore_stream_outputs(cso);
- cso_restore_tessctrl_shader(cso);
- cso_restore_tesseval_shader(cso);
- cso_restore_geometry_shader(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_vertex_elements(cso);
- cso_restore_aux_vertex_buffer_slot(cso);
+ cso_restore_state(cso);
cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
- cso_restore_render_condition(cso);
pipe_surface_reference(&surf, NULL);
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index 14de61b163f..023a028a1d0 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -283,8 +283,8 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
return SUPER(mm);
failure:
-if(mm->heap)
- u_mmDestroy(mm->heap);
+ if(mm->heap)
+ u_mmDestroy(mm->heap);
if(mm->map)
pb_unmap(mm->buffer);
FREE(mm);
diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c
index c6c7b88eea3..9dc8fb51ae2 100644
--- a/src/gallium/auxiliary/postprocess/pp_run.c
+++ b/src/gallium/auxiliary/postprocess/pp_run.c
@@ -115,27 +115,27 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
}
/* save state (restored below) */
- cso_save_blend(cso);
- cso_save_depth_stencil_alpha(cso);
- cso_save_fragment_shader(cso);
- cso_save_framebuffer(cso);
- cso_save_tessctrl_shader(cso);
- cso_save_tesseval_shader(cso);
- cso_save_geometry_shader(cso);
- cso_save_rasterizer(cso);
- cso_save_sample_mask(cso);
- cso_save_min_samples(cso);
- cso_save_fragment_samplers(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_stencil_ref(cso);
- cso_save_stream_outputs(cso);
- cso_save_vertex_elements(cso);
- cso_save_vertex_shader(cso);
- cso_save_viewport(cso);
- cso_save_aux_vertex_buffer_slot(cso);
+ cso_save_state(cso, (CSO_BIT_BLEND |
+ CSO_BIT_DEPTH_STENCIL_ALPHA |
+ CSO_BIT_FRAGMENT_SHADER |
+ CSO_BIT_FRAMEBUFFER |
+ CSO_BIT_TESSCTRL_SHADER |
+ CSO_BIT_TESSEVAL_SHADER |
+ CSO_BIT_GEOMETRY_SHADER |
+ CSO_BIT_RASTERIZER |
+ CSO_BIT_SAMPLE_MASK |
+ CSO_BIT_MIN_SAMPLES |
+ CSO_BIT_FRAGMENT_SAMPLERS |
+ CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+ CSO_BIT_STENCIL_REF |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_VERTEX_SHADER |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BIT_RENDER_CONDITION));
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
- cso_save_render_condition(cso);
/* set default state */
cso_set_sample_mask(cso, ~0);
@@ -186,27 +186,9 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
}
/* restore state we changed */
- cso_restore_blend(cso);
- cso_restore_depth_stencil_alpha(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_framebuffer(cso);
- cso_restore_tessctrl_shader(cso);
- cso_restore_tesseval_shader(cso);
- cso_restore_geometry_shader(cso);
- cso_restore_rasterizer(cso);
- cso_restore_sample_mask(cso);
- cso_restore_min_samples(cso);
- cso_restore_fragment_samplers(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_stencil_ref(cso);
- cso_restore_stream_outputs(cso);
- cso_restore_vertex_elements(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_viewport(cso);
- cso_restore_aux_vertex_buffer_slot(cso);
+ cso_restore_state(cso);
cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX);
cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
- cso_restore_render_condition(cso);
pipe_resource_reference(&ppq->depth, NULL);
pipe_resource_reference(&refin, NULL);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 83f50628b40..cfe9b92ee1b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -111,6 +111,7 @@ tgsi_default_declaration( void )
declaration.Local = 0;
declaration.Array = 0;
declaration.Atomic = 0;
+ declaration.Shared = 0;
declaration.Padding = 0;
return declaration;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 2ad29b9d49a..f232f3870d1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -352,7 +352,7 @@ iter_declaration(
TXT(", ");
ENM(decl->Image.Resource, tgsi_texture_names);
TXT(", ");
- UID(decl->Image.Format);
+ TXT(util_format_name(decl->Image.Format));
if (decl->Image.Writable)
TXT(", WR");
if (decl->Image.Raw)
@@ -364,6 +364,11 @@ iter_declaration(
TXT(", ATOMIC");
}
+ if (decl->Declaration.File == TGSI_FILE_MEMORY) {
+ if (decl->Declaration.Shared)
+ TXT(", SHARED");
+ }
+
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
TXT(", ");
ENM(decl->SamplerView.Resource, tgsi_texture_names);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index d898fd66f48..126259fc0f8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2300,7 +2300,8 @@ exec_txf(struct tgsi_exec_machine *mach,
IFETCH(&r[3], 0, TGSI_CHAN_W);
- if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
+ inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
target = mach->SamplerViews[unit].Resource;
}
else {
@@ -2342,7 +2343,8 @@ exec_txf(struct tgsi_exec_machine *mach,
r[3].f[j] = rgba[3][j];
}
- if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
+ inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
unsigned char swizzles[4];
swizzles[0] = inst->Src[1].Register.SwizzleX;
swizzles[1] = inst->Src[1].Register.SwizzleY;
@@ -4967,7 +4969,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SAMPLE_I_MS:
- assert(0);
+ exec_txf(mach, inst);
break;
case TGSI_OPCODE_SAMPLE:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 26fec8e2142..12a68759ce5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -465,6 +465,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
return PIPE_MAX_SHADER_SAMPLER_VIEWS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 1 << PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_DOUBLES:
@@ -474,6 +476,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 489423d7f12..4f85d2fda67 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -247,7 +247,14 @@ scan_declaration(struct tgsi_shader_info *info,
info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
- info->num_inputs++;
+
+ /* Vertex shaders can have inputs with holes between them. */
+ if (info->processor == TGSI_PROCESSOR_VERTEX)
+ info->num_inputs = MAX2(info->num_inputs, reg + 1);
+ else {
+ info->num_inputs++;
+ assert(reg < info->num_inputs);
+ }
/* Only interpolated varyings. Don't include POSITION.
* Don't include integer varyings, because they are not
@@ -341,6 +348,7 @@ scan_declaration(struct tgsi_shader_info *info,
info->output_semantic_name[reg] = (ubyte) semName;
info->output_semantic_index[reg] = (ubyte) semIndex;
info->num_outputs++;
+ assert(reg < info->num_outputs);
if (semName == TGSI_SEMANTIC_COLOR)
info->colors_written |= 1 << semIndex;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index f2d70d49839..b15ae69cf7a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -57,6 +57,7 @@ static const char *tgsi_file_names[] =
"IMAGE",
"SVIEW",
"BUFFER",
+ "MEMORY",
};
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 97b1869a66f..91baa01ad8b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -1290,8 +1290,6 @@ static boolean parse_declaration( struct translate_ctx *ctx )
return FALSE;
}
- /* XXX format */
-
cur2 = cur;
eat_opt_white(&cur2);
while (*cur2 == ',') {
@@ -1304,7 +1302,16 @@ static boolean parse_declaration( struct translate_ctx *ctx )
decl.Image.Writable = 1;
} else {
- break;
+ for (i = 0; i < PIPE_FORMAT_COUNT; i++) {
+ const struct util_format_description *desc =
+ util_format_description(i);
+ if (desc && str_match_nocase_whole(&cur2, desc->name)) {
+ decl.Image.Format = i;
+ break;
+ }
+ }
+ if (i == PIPE_FORMAT_COUNT)
+ break;
}
cur = cur2;
eat_opt_white(&cur2);
@@ -1381,6 +1388,9 @@ static boolean parse_declaration( struct translate_ctx *ctx )
if (str_match_nocase_whole(&cur, "ATOMIC")) {
decl.Declaration.Atomic = 1;
ctx->cur = cur;
+ } else if (str_match_nocase_whole(&cur, "SHARED")) {
+ decl.Declaration.Shared = 1;
+ ctx->cur = cur;
}
} else {
if (str_match_nocase_whole(&cur, "LOCAL")) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 9654ac52bf2..e1a72786476 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -189,6 +189,8 @@ struct ureg_program
unsigned nr_instructions;
struct ureg_tokens domain[2];
+
+ bool use_shared_memory;
};
static union tgsi_any_token error_tokens[32];
@@ -727,6 +729,16 @@ struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
return reg;
}
+/* Allocate a shared memory area.
+ */
+struct ureg_src ureg_DECL_shared_memory(struct ureg_program *ureg)
+{
+ struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, 0);
+
+ ureg->use_shared_memory = true;
+ return reg;
+}
+
static int
match_or_expand_immediate64( const unsigned *v,
int type,
@@ -1654,6 +1666,23 @@ emit_decl_buffer(struct ureg_program *ureg,
}
static void
+emit_decl_shared_memory(struct ureg_program *ureg)
+{
+ union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
+
+ out[0].value = 0;
+ out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+ out[0].decl.NrTokens = 2;
+ out[0].decl.File = TGSI_FILE_MEMORY;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
+ out[0].decl.Shared = true;
+
+ out[1].value = 0;
+ out[1].decl_range.First = 0;
+ out[1].decl_range.Last = 0;
+}
+
+static void
emit_immediate( struct ureg_program *ureg,
const unsigned *v,
unsigned type )
@@ -1825,6 +1854,9 @@ static void emit_decls( struct ureg_program *ureg )
emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
}
+ if (ureg->use_shared_memory)
+ emit_decl_shared_memory(ureg);
+
if (ureg->const_decls.nr_constant_ranges) {
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
emit_decl_range(ureg,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 86e58a91343..6a3b5ddf017 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -337,6 +337,9 @@ ureg_DECL_image(struct ureg_program *ureg,
struct ureg_src
ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic);
+struct ureg_src
+ureg_DECL_shared_memory(struct ureg_program *ureg);
+
static inline struct ureg_src
ureg_imm4f( struct ureg_program *ureg,
float a, float b,
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 9737c940936..22c40d1382d 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -541,23 +541,23 @@ util_blit_pixels_tex(struct blit_state *ctx,
PIPE_BIND_RENDER_TARGET));
/* save state (restored below) */
- cso_save_blend(ctx->cso);
- cso_save_depth_stencil_alpha(ctx->cso);
- cso_save_rasterizer(ctx->cso);
- cso_save_sample_mask(ctx->cso);
- cso_save_min_samples(ctx->cso);
- cso_save_fragment_samplers(ctx->cso);
- cso_save_fragment_sampler_views(ctx->cso);
- cso_save_stream_outputs(ctx->cso);
- cso_save_viewport(ctx->cso);
- cso_save_framebuffer(ctx->cso);
- cso_save_fragment_shader(ctx->cso);
- cso_save_vertex_shader(ctx->cso);
- cso_save_tessctrl_shader(ctx->cso);
- cso_save_tesseval_shader(ctx->cso);
- cso_save_geometry_shader(ctx->cso);
- cso_save_vertex_elements(ctx->cso);
- cso_save_aux_vertex_buffer_slot(ctx->cso);
+ cso_save_state(ctx->cso, (CSO_BIT_BLEND |
+ CSO_BIT_DEPTH_STENCIL_ALPHA |
+ CSO_BIT_RASTERIZER |
+ CSO_BIT_SAMPLE_MASK |
+ CSO_BIT_MIN_SAMPLES |
+ CSO_BIT_FRAGMENT_SAMPLERS |
+ CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_FRAMEBUFFER |
+ CSO_BIT_FRAGMENT_SHADER |
+ CSO_BIT_VERTEX_SHADER |
+ CSO_BIT_TESSCTRL_SHADER |
+ CSO_BIT_TESSEVAL_SHADER |
+ CSO_BIT_GEOMETRY_SHADER |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT));
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend_write_color);
@@ -625,21 +625,5 @@ util_blit_pixels_tex(struct blit_state *ctx,
2); /* attribs/vert */
/* restore state we changed */
- cso_restore_blend(ctx->cso);
- cso_restore_depth_stencil_alpha(ctx->cso);
- cso_restore_rasterizer(ctx->cso);
- cso_restore_sample_mask(ctx->cso);
- cso_restore_min_samples(ctx->cso);
- cso_restore_fragment_samplers(ctx->cso);
- cso_restore_fragment_sampler_views(ctx->cso);
- cso_restore_viewport(ctx->cso);
- cso_restore_framebuffer(ctx->cso);
- cso_restore_fragment_shader(ctx->cso);
- cso_restore_vertex_shader(ctx->cso);
- cso_restore_tessctrl_shader(ctx->cso);
- cso_restore_tesseval_shader(ctx->cso);
- cso_restore_geometry_shader(ctx->cso);
- cso_restore_vertex_elements(ctx->cso);
- cso_restore_aux_vertex_buffer_slot(ctx->cso);
- cso_restore_stream_outputs(ctx->cso);
+ cso_restore_state(ctx->cso);
}
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 57a3b0b6082..d0812039292 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -174,17 +174,6 @@ pipe_sampler_view_release(struct pipe_context *ctx,
}
static inline void
-pipe_image_view_reference(struct pipe_image_view **ptr, struct pipe_image_view *view)
-{
- struct pipe_image_view *old_view = *ptr;
-
- if (pipe_reference_described(&(*ptr)->reference, &view->reference,
- (debug_reference_descriptor)debug_describe_image_view))
- old_view->context->image_view_destroy(old_view->context, old_view);
- *ptr = view;
-}
-
-static inline void
pipe_so_target_reference(struct pipe_stream_output_target **ptr,
struct pipe_stream_output_target *target)
{
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 3324bcca6f4..b5d691f4f7e 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -415,6 +415,9 @@ to be 0.
(also used to implement atomic counters). Having this be non-0 also
implies support for the ``LOAD``, ``STORE``, and ``ATOM*`` TGSI
opcodes.
+* ``PIPE_SHADER_CAP_SUPPORTED_IRS``: Supported representations of the
+ program. It should be a mask of ``pipe_shader_ir`` bits.
+* ``PIPE_SHADER_CAP_MAX_SHADER_IMAGES``: Maximum number of image units.
.. _pipe_compute_cap:
diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c
index 3ae7764ff3f..9dfaa0af289 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -415,30 +415,6 @@ dd_context_sampler_view_destroy(struct pipe_context *_pipe,
pipe->sampler_view_destroy(pipe, view);
}
-static struct pipe_image_view *
-dd_context_create_image_view(struct pipe_context *_pipe,
- struct pipe_resource *resource,
- const struct pipe_image_view *templ)
-{
- struct pipe_context *pipe = dd_context(_pipe)->pipe;
- struct pipe_image_view *view =
- pipe->create_image_view(pipe, resource, templ);
-
- if (!view)
- return NULL;
- view->context = _pipe;
- return view;
-}
-
-static void
-dd_context_image_view_destroy(struct pipe_context *_pipe,
- struct pipe_image_view *view)
-{
- struct pipe_context *pipe = dd_context(_pipe)->pipe;
-
- pipe->image_view_destroy(pipe, view);
-}
-
static struct pipe_stream_output_target *
dd_context_create_stream_output_target(struct pipe_context *_pipe,
struct pipe_resource *res,
@@ -486,7 +462,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader,
static void
dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader,
unsigned start, unsigned num,
- struct pipe_image_view **views)
+ struct pipe_image_view *views)
{
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
@@ -744,8 +720,6 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
CTX_INIT(sampler_view_destroy);
CTX_INIT(create_surface);
CTX_INIT(surface_destroy);
- CTX_INIT(create_image_view);
- CTX_INIT(image_view_destroy);
CTX_INIT(transfer_map);
CTX_INIT(transfer_flush_region);
CTX_INIT(transfer_unmap);
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h
index 80098dcb644..c9bbd569abe 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -94,7 +94,7 @@ struct dd_context
struct pipe_constant_buffer constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- struct pipe_image_view *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
+ struct pipe_image_view shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
struct pipe_shader_buffer shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
struct dd_state *velems;
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index d23111352b7..71ee55054d3 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index c4f253b836c..c6286a1f290 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -255,11 +256,273 @@ enum a3xx_color_fmt {
RB_R32G32B32A32_UINT = 59,
};
+enum a3xx_cp_perfcounter_select {
+ CP_ALWAYS_COUNT = 0,
+ CP_AHB_PFPTRANS_WAIT = 3,
+ CP_AHB_NRTTRANS_WAIT = 6,
+ CP_CSF_NRT_READ_WAIT = 8,
+ CP_CSF_I1_FIFO_FULL = 9,
+ CP_CSF_I2_FIFO_FULL = 10,
+ CP_CSF_ST_FIFO_FULL = 11,
+ CP_RESERVED_12 = 12,
+ CP_CSF_RING_ROQ_FULL = 13,
+ CP_CSF_I1_ROQ_FULL = 14,
+ CP_CSF_I2_ROQ_FULL = 15,
+ CP_CSF_ST_ROQ_FULL = 16,
+ CP_RESERVED_17 = 17,
+ CP_MIU_TAG_MEM_FULL = 18,
+ CP_MIU_NRT_WRITE_STALLED = 22,
+ CP_MIU_NRT_READ_STALLED = 23,
+ CP_ME_REGS_RB_DONE_FIFO_FULL = 26,
+ CP_ME_REGS_VS_EVENT_FIFO_FULL = 27,
+ CP_ME_REGS_PS_EVENT_FIFO_FULL = 28,
+ CP_ME_REGS_CF_EVENT_FIFO_FULL = 29,
+ CP_ME_MICRO_RB_STARVED = 30,
+ CP_AHB_RBBM_DWORD_SENT = 40,
+ CP_ME_BUSY_CLOCKS = 41,
+ CP_ME_WAIT_CONTEXT_AVAIL = 42,
+ CP_PFP_TYPE0_PACKET = 43,
+ CP_PFP_TYPE3_PACKET = 44,
+ CP_CSF_RB_WPTR_NEQ_RPTR = 45,
+ CP_CSF_I1_SIZE_NEQ_ZERO = 46,
+ CP_CSF_I2_SIZE_NEQ_ZERO = 47,
+ CP_CSF_RBI1I2_FETCHING = 48,
+};
+
+enum a3xx_gras_tse_perfcounter_select {
+ GRAS_TSEPERF_INPUT_PRIM = 0,
+ GRAS_TSEPERF_INPUT_NULL_PRIM = 1,
+ GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2,
+ GRAS_TSEPERF_CLIPPED_PRIM = 3,
+ GRAS_TSEPERF_NEW_PRIM = 4,
+ GRAS_TSEPERF_ZERO_AREA_PRIM = 5,
+ GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6,
+ GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7,
+ GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8,
+ GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9,
+ GRAS_TSEPERF_PRE_CLIP_PRIM = 10,
+ GRAS_TSEPERF_POST_CLIP_PRIM = 11,
+ GRAS_TSEPERF_WORKING_CYCLES = 12,
+ GRAS_TSEPERF_PC_STARVE = 13,
+ GRAS_TSERASPERF_STALL = 14,
+};
+
+enum a3xx_gras_ras_perfcounter_select {
+ GRAS_RASPERF_16X16_TILES = 0,
+ GRAS_RASPERF_8X8_TILES = 1,
+ GRAS_RASPERF_4X4_TILES = 2,
+ GRAS_RASPERF_WORKING_CYCLES = 3,
+ GRAS_RASPERF_STALL_CYCLES_BY_RB = 4,
+ GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5,
+ GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6,
+};
+
+enum a3xx_hlsq_perfcounter_select {
+ HLSQ_PERF_SP_VS_CONSTANT = 0,
+ HLSQ_PERF_SP_VS_INSTRUCTIONS = 1,
+ HLSQ_PERF_SP_FS_CONSTANT = 2,
+ HLSQ_PERF_SP_FS_INSTRUCTIONS = 3,
+ HLSQ_PERF_TP_STATE = 4,
+ HLSQ_PERF_QUADS = 5,
+ HLSQ_PERF_PIXELS = 6,
+ HLSQ_PERF_VERTICES = 7,
+ HLSQ_PERF_FS8_THREADS = 8,
+ HLSQ_PERF_FS16_THREADS = 9,
+ HLSQ_PERF_FS32_THREADS = 10,
+ HLSQ_PERF_VS8_THREADS = 11,
+ HLSQ_PERF_VS16_THREADS = 12,
+ HLSQ_PERF_SP_VS_DATA_BYTES = 13,
+ HLSQ_PERF_SP_FS_DATA_BYTES = 14,
+ HLSQ_PERF_ACTIVE_CYCLES = 15,
+ HLSQ_PERF_STALL_CYCLES_SP_STATE = 16,
+ HLSQ_PERF_STALL_CYCLES_SP_VS = 17,
+ HLSQ_PERF_STALL_CYCLES_SP_FS = 18,
+ HLSQ_PERF_STALL_CYCLES_UCHE = 19,
+ HLSQ_PERF_RBBM_LOAD_CYCLES = 20,
+ HLSQ_PERF_DI_TO_VS_START_SP0 = 21,
+ HLSQ_PERF_DI_TO_FS_START_SP0 = 22,
+ HLSQ_PERF_VS_START_TO_DONE_SP0 = 23,
+ HLSQ_PERF_FS_START_TO_DONE_SP0 = 24,
+ HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25,
+ HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26,
+ HLSQ_PERF_UCHE_LATENCY_CYCLES = 27,
+ HLSQ_PERF_UCHE_LATENCY_COUNT = 28,
+};
+
+enum a3xx_pc_perfcounter_select {
+ PC_PCPERF_VISIBILITY_STREAMS = 0,
+ PC_PCPERF_TOTAL_INSTANCES = 1,
+ PC_PCPERF_PRIMITIVES_PC_VPC = 2,
+ PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3,
+ PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4,
+ PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5,
+ PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6,
+ PC_PCPERF_VERTICES_TO_VFD = 7,
+ PC_PCPERF_REUSED_VERTICES = 8,
+ PC_PCPERF_CYCLES_STALLED_BY_VFD = 9,
+ PC_PCPERF_CYCLES_STALLED_BY_TSE = 10,
+ PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11,
+ PC_PCPERF_CYCLES_IS_WORKING = 12,
+};
+
+enum a3xx_rb_perfcounter_select {
+ RB_RBPERF_ACTIVE_CYCLES_ANY = 0,
+ RB_RBPERF_ACTIVE_CYCLES_ALL = 1,
+ RB_RBPERF_STARVE_CYCLES_BY_SP = 2,
+ RB_RBPERF_STARVE_CYCLES_BY_RAS = 3,
+ RB_RBPERF_STARVE_CYCLES_BY_MARB = 4,
+ RB_RBPERF_STALL_CYCLES_BY_MARB = 5,
+ RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6,
+ RB_RBPERF_RB_MARB_DATA = 7,
+ RB_RBPERF_SP_RB_QUAD = 8,
+ RB_RBPERF_RAS_EARLY_Z_QUADS = 9,
+ RB_RBPERF_GMEM_CH0_READ = 10,
+ RB_RBPERF_GMEM_CH1_READ = 11,
+ RB_RBPERF_GMEM_CH0_WRITE = 12,
+ RB_RBPERF_GMEM_CH1_WRITE = 13,
+ RB_RBPERF_CP_CONTEXT_DONE = 14,
+ RB_RBPERF_CP_CACHE_FLUSH = 15,
+ RB_RBPERF_CP_ZPASS_DONE = 16,
+};
+
+enum a3xx_rbbm_perfcounter_select {
+ RBBM_ALAWYS_ON = 0,
+ RBBM_VBIF_BUSY = 1,
+ RBBM_TSE_BUSY = 2,
+ RBBM_RAS_BUSY = 3,
+ RBBM_PC_DCALL_BUSY = 4,
+ RBBM_PC_VSD_BUSY = 5,
+ RBBM_VFD_BUSY = 6,
+ RBBM_VPC_BUSY = 7,
+ RBBM_UCHE_BUSY = 8,
+ RBBM_VSC_BUSY = 9,
+ RBBM_HLSQ_BUSY = 10,
+ RBBM_ANY_RB_BUSY = 11,
+ RBBM_ANY_TEX_BUSY = 12,
+ RBBM_ANY_USP_BUSY = 13,
+ RBBM_ANY_MARB_BUSY = 14,
+ RBBM_ANY_ARB_BUSY = 15,
+ RBBM_AHB_STATUS_BUSY = 16,
+ RBBM_AHB_STATUS_STALLED = 17,
+ RBBM_AHB_STATUS_TXFR = 18,
+ RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+ RBBM_AHB_STATUS_TXFR_ERROR = 20,
+ RBBM_AHB_STATUS_LONG_STALL = 21,
+ RBBM_RBBM_STATUS_MASKED = 22,
+};
+
enum a3xx_sp_perfcounter_select {
+ SP_LM_LOAD_INSTRUCTIONS = 0,
+ SP_LM_STORE_INSTRUCTIONS = 1,
+ SP_LM_ATOMICS = 2,
+ SP_UCHE_LOAD_INSTRUCTIONS = 3,
+ SP_UCHE_STORE_INSTRUCTIONS = 4,
+ SP_UCHE_ATOMICS = 5,
+ SP_VS_TEX_INSTRUCTIONS = 6,
+ SP_VS_CFLOW_INSTRUCTIONS = 7,
+ SP_VS_EFU_INSTRUCTIONS = 8,
+ SP_VS_FULL_ALU_INSTRUCTIONS = 9,
+ SP_VS_HALF_ALU_INSTRUCTIONS = 10,
+ SP_FS_TEX_INSTRUCTIONS = 11,
SP_FS_CFLOW_INSTRUCTIONS = 12,
+ SP_FS_EFU_INSTRUCTIONS = 13,
SP_FS_FULL_ALU_INSTRUCTIONS = 14,
- SP0_ICL1_MISSES = 26,
+ SP_FS_HALF_ALU_INSTRUCTIONS = 15,
+ SP_FS_BARY_INSTRUCTIONS = 16,
+ SP_VS_INSTRUCTIONS = 17,
+ SP_FS_INSTRUCTIONS = 18,
+ SP_ADDR_LOCK_COUNT = 19,
+ SP_UCHE_READ_TRANS = 20,
+ SP_UCHE_WRITE_TRANS = 21,
+ SP_EXPORT_VPC_TRANS = 22,
+ SP_EXPORT_RB_TRANS = 23,
+ SP_PIXELS_KILLED = 24,
+ SP_ICL1_REQUESTS = 25,
+ SP_ICL1_MISSES = 26,
+ SP_ICL0_REQUESTS = 27,
+ SP_ICL0_MISSES = 28,
SP_ALU_ACTIVE_CYCLES = 29,
+ SP_EFU_ACTIVE_CYCLES = 30,
+ SP_STALL_CYCLES_BY_VPC = 31,
+ SP_STALL_CYCLES_BY_TP = 32,
+ SP_STALL_CYCLES_BY_UCHE = 33,
+ SP_STALL_CYCLES_BY_RB = 34,
+ SP_ACTIVE_CYCLES_ANY = 35,
+ SP_ACTIVE_CYCLES_ALL = 36,
+};
+
+enum a3xx_tp_perfcounter_select {
+ TPL1_TPPERF_L1_REQUESTS = 0,
+ TPL1_TPPERF_TP0_L1_REQUESTS = 1,
+ TPL1_TPPERF_TP0_L1_MISSES = 2,
+ TPL1_TPPERF_TP1_L1_REQUESTS = 3,
+ TPL1_TPPERF_TP1_L1_MISSES = 4,
+ TPL1_TPPERF_TP2_L1_REQUESTS = 5,
+ TPL1_TPPERF_TP2_L1_MISSES = 6,
+ TPL1_TPPERF_TP3_L1_REQUESTS = 7,
+ TPL1_TPPERF_TP3_L1_MISSES = 8,
+ TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9,
+ TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10,
+ TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11,
+ TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12,
+ TPL1_TPPERF_BILINEAR_OPS = 13,
+ TPL1_TPPERF_QUADSQUADS_OFFSET = 14,
+ TPL1_TPPERF_QUADQUADS_SHADOW = 15,
+ TPL1_TPPERF_QUADS_ARRAY = 16,
+ TPL1_TPPERF_QUADS_PROJECTION = 17,
+ TPL1_TPPERF_QUADS_GRADIENT = 18,
+ TPL1_TPPERF_QUADS_1D2D = 19,
+ TPL1_TPPERF_QUADS_3DCUBE = 20,
+ TPL1_TPPERF_ZERO_LOD = 21,
+ TPL1_TPPERF_OUTPUT_TEXELS = 22,
+ TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23,
+ TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24,
+ TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25,
+ TPL1_TPPERF_LATENCY = 26,
+ TPL1_TPPERF_LATENCY_TRANS = 27,
+};
+
+enum a3xx_vfd_perfcounter_select {
+ VFD_PERF_UCHE_BYTE_FETCHED = 0,
+ VFD_PERF_UCHE_TRANS = 1,
+ VFD_PERF_VPC_BYPASS_COMPONENTS = 2,
+ VFD_PERF_FETCH_INSTRUCTIONS = 3,
+ VFD_PERF_DECODE_INSTRUCTIONS = 4,
+ VFD_PERF_ACTIVE_CYCLES = 5,
+ VFD_PERF_STALL_CYCLES_UCHE = 6,
+ VFD_PERF_STALL_CYCLES_HLSQ = 7,
+ VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8,
+ VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9,
+};
+
+enum a3xx_vpc_perfcounter_select {
+ VPC_PERF_SP_LM_PRIMITIVES = 0,
+ VPC_PERF_COMPONENTS_FROM_SP = 1,
+ VPC_PERF_SP_LM_COMPONENTS = 2,
+ VPC_PERF_ACTIVE_CYCLES = 3,
+ VPC_PERF_STALL_CYCLES_LM = 4,
+ VPC_PERF_STALL_CYCLES_RAS = 5,
+};
+
+enum a3xx_uche_perfcounter_select {
+ UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4,
+ UCHE_UCHEPERF_READ_REQUESTS_TP = 8,
+ UCHE_UCHEPERF_READ_REQUESTS_VFD = 9,
+ UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10,
+ UCHE_UCHEPERF_READ_REQUESTS_MARB = 11,
+ UCHE_UCHEPERF_READ_REQUESTS_SP = 12,
+ UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13,
+ UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14,
+ UCHE_UCHEPERF_TAG_CHECK_FAILS = 15,
+ UCHE_UCHEPERF_EVICTS = 16,
+ UCHE_UCHEPERF_FLUSHES = 17,
+ UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18,
+ UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19,
+ UCHE_UCHEPERF_ACTIVE_CYCLES = 20,
};
enum a3xx_rb_blend_opcode {
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 811f58bbba2..8c37992e17d 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
#include "fd3_emit.h"
#include "fd3_blend.h"
@@ -888,6 +889,8 @@ fd3_emit_restore(struct fd_context *ctx)
fd_wfi(ctx, ring);
+ fd_hw_query_enable(ctx, ring);
+
ctx->needs_rb_fbd = true;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index e8df429441e..d6fd1bb583e 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -271,6 +272,545 @@ enum a4xx_tess_spacing {
EVEN_SPACING = 3,
};
+enum a4xx_ccu_perfcounter_select {
+ CCU_BUSY_CYCLES = 0,
+ CCU_RB_DEPTH_RETURN_STALL = 2,
+ CCU_RB_COLOR_RETURN_STALL = 3,
+ CCU_DEPTH_BLOCKS = 6,
+ CCU_COLOR_BLOCKS = 7,
+ CCU_DEPTH_BLOCK_HIT = 8,
+ CCU_COLOR_BLOCK_HIT = 9,
+ CCU_DEPTH_FLAG1_COUNT = 10,
+ CCU_DEPTH_FLAG2_COUNT = 11,
+ CCU_DEPTH_FLAG3_COUNT = 12,
+ CCU_DEPTH_FLAG4_COUNT = 13,
+ CCU_COLOR_FLAG1_COUNT = 14,
+ CCU_COLOR_FLAG2_COUNT = 15,
+ CCU_COLOR_FLAG3_COUNT = 16,
+ CCU_COLOR_FLAG4_COUNT = 17,
+ CCU_PARTIAL_BLOCK_READ = 18,
+};
+
+enum a4xx_cp_perfcounter_select {
+ CP_ALWAYS_COUNT = 0,
+ CP_BUSY = 1,
+ CP_PFP_IDLE = 2,
+ CP_PFP_BUSY_WORKING = 3,
+ CP_PFP_STALL_CYCLES_ANY = 4,
+ CP_PFP_STARVE_CYCLES_ANY = 5,
+ CP_PFP_STARVED_PER_LOAD_ADDR = 6,
+ CP_PFP_STALLED_PER_STORE_ADDR = 7,
+ CP_PFP_PC_PROFILE = 8,
+ CP_PFP_MATCH_PM4_PKT_PROFILE = 9,
+ CP_PFP_COND_INDIRECT_DISCARDED = 10,
+ CP_LONG_RESUMPTIONS = 11,
+ CP_RESUME_CYCLES = 12,
+ CP_RESUME_TO_BOUNDARY_CYCLES = 13,
+ CP_LONG_PREEMPTIONS = 14,
+ CP_PREEMPT_CYCLES = 15,
+ CP_PREEMPT_TO_BOUNDARY_CYCLES = 16,
+ CP_ME_FIFO_EMPTY_PFP_IDLE = 17,
+ CP_ME_FIFO_EMPTY_PFP_BUSY = 18,
+ CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19,
+ CP_ME_FIFO_FULL_ME_BUSY = 20,
+ CP_ME_FIFO_FULL_ME_NON_WORKING = 21,
+ CP_ME_WAITING_FOR_PACKETS = 22,
+ CP_ME_BUSY_WORKING = 23,
+ CP_ME_STARVE_CYCLES_ANY = 24,
+ CP_ME_STARVE_CYCLES_PER_PROFILE = 25,
+ CP_ME_STALL_CYCLES_PER_PROFILE = 26,
+ CP_ME_PC_PROFILE = 27,
+ CP_RCIU_FIFO_EMPTY = 28,
+ CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29,
+ CP_RCIU_FIFO_FULL = 30,
+ CP_RCIU_FIFO_FULL_NO_CONTEXT = 31,
+ CP_RCIU_FIFO_FULL_AHB_MASTER = 32,
+ CP_RCIU_FIFO_FULL_OTHER = 33,
+ CP_AHB_IDLE = 34,
+ CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35,
+ CP_AHB_STALL_ON_GRANT_SPLIT = 36,
+ CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37,
+ CP_AHB_BUSY_WORKING = 38,
+ CP_AHB_BUSY_STALL_ON_HRDY = 39,
+ CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40,
+};
+
+enum a4xx_gras_ras_perfcounter_select {
+ RAS_SUPER_TILES = 0,
+ RAS_8X8_TILES = 1,
+ RAS_4X4_TILES = 2,
+ RAS_BUSY_CYCLES = 3,
+ RAS_STALL_CYCLES_BY_RB = 4,
+ RAS_STALL_CYCLES_BY_VSC = 5,
+ RAS_STARVE_CYCLES_BY_TSE = 6,
+ RAS_SUPERTILE_CYCLES = 7,
+ RAS_TILE_CYCLES = 8,
+ RAS_FULLY_COVERED_SUPER_TILES = 9,
+ RAS_FULLY_COVERED_8X8_TILES = 10,
+ RAS_4X4_PRIM = 11,
+ RAS_8X4_4X8_PRIM = 12,
+ RAS_8X8_PRIM = 13,
+};
+
+enum a4xx_gras_tse_perfcounter_select {
+ TSE_INPUT_PRIM = 0,
+ TSE_INPUT_NULL_PRIM = 1,
+ TSE_TRIVAL_REJ_PRIM = 2,
+ TSE_CLIPPED_PRIM = 3,
+ TSE_NEW_PRIM = 4,
+ TSE_ZERO_AREA_PRIM = 5,
+ TSE_FACENESS_CULLED_PRIM = 6,
+ TSE_ZERO_PIXEL_PRIM = 7,
+ TSE_OUTPUT_NULL_PRIM = 8,
+ TSE_OUTPUT_VISIBLE_PRIM = 9,
+ TSE_PRE_CLIP_PRIM = 10,
+ TSE_POST_CLIP_PRIM = 11,
+ TSE_BUSY_CYCLES = 12,
+ TSE_PC_STARVE = 13,
+ TSE_RAS_STALL = 14,
+ TSE_STALL_BARYPLANE_FIFO_FULL = 15,
+ TSE_STALL_ZPLANE_FIFO_FULL = 16,
+};
+
+enum a4xx_hlsq_perfcounter_select {
+ HLSQ_SP_VS_STAGE_CONSTANT = 0,
+ HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1,
+ HLSQ_SP_FS_STAGE_CONSTANT = 2,
+ HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3,
+ HLSQ_TP_STATE = 4,
+ HLSQ_QUADS = 5,
+ HLSQ_PIXELS = 6,
+ HLSQ_VERTICES = 7,
+ HLSQ_SP_VS_STAGE_DATA_BYTES = 13,
+ HLSQ_SP_FS_STAGE_DATA_BYTES = 14,
+ HLSQ_BUSY_CYCLES = 15,
+ HLSQ_STALL_CYCLES_SP_STATE = 16,
+ HLSQ_STALL_CYCLES_SP_VS_STAGE = 17,
+ HLSQ_STALL_CYCLES_SP_FS_STAGE = 18,
+ HLSQ_STALL_CYCLES_UCHE = 19,
+ HLSQ_RBBM_LOAD_CYCLES = 20,
+ HLSQ_DI_TO_VS_START_SP = 21,
+ HLSQ_DI_TO_FS_START_SP = 22,
+ HLSQ_VS_STAGE_START_TO_DONE_SP = 23,
+ HLSQ_FS_STAGE_START_TO_DONE_SP = 24,
+ HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25,
+ HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26,
+ HLSQ_UCHE_LATENCY_CYCLES = 27,
+ HLSQ_UCHE_LATENCY_COUNT = 28,
+ HLSQ_STARVE_CYCLES_VFD = 29,
+};
+
+enum a4xx_pc_perfcounter_select {
+ PC_VIS_STREAMS_LOADED = 0,
+ PC_VPC_PRIMITIVES = 2,
+ PC_DEAD_PRIM = 3,
+ PC_LIVE_PRIM = 4,
+ PC_DEAD_DRAWCALLS = 5,
+ PC_LIVE_DRAWCALLS = 6,
+ PC_VERTEX_MISSES = 7,
+ PC_STALL_CYCLES_VFD = 9,
+ PC_STALL_CYCLES_TSE = 10,
+ PC_STALL_CYCLES_UCHE = 11,
+ PC_WORKING_CYCLES = 12,
+ PC_IA_VERTICES = 13,
+ PC_GS_PRIMITIVES = 14,
+ PC_HS_INVOCATIONS = 15,
+ PC_DS_INVOCATIONS = 16,
+ PC_DS_PRIMITIVES = 17,
+ PC_STARVE_CYCLES_FOR_INDEX = 20,
+ PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21,
+ PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22,
+ PC_STALL_CYCLES_TESS = 23,
+ PC_STARVE_CYCLES_FOR_POSITION = 24,
+ PC_MODE0_DRAWCALL = 25,
+ PC_MODE1_DRAWCALL = 26,
+ PC_MODE2_DRAWCALL = 27,
+ PC_MODE3_DRAWCALL = 28,
+ PC_MODE4_DRAWCALL = 29,
+ PC_PREDICATED_DEAD_DRAWCALL = 30,
+ PC_STALL_CYCLES_BY_TSE_ONLY = 31,
+ PC_STALL_CYCLES_BY_VPC_ONLY = 32,
+ PC_VPC_POS_DATA_TRANSACTION = 33,
+ PC_BUSY_CYCLES = 34,
+ PC_STARVE_CYCLES_DI = 35,
+ PC_STALL_CYCLES_VPC = 36,
+ TESS_WORKING_CYCLES = 37,
+ TESS_NUM_CYCLES_SETUP_WORKING = 38,
+ TESS_NUM_CYCLES_PTGEN_WORKING = 39,
+ TESS_NUM_CYCLES_CONNGEN_WORKING = 40,
+ TESS_BUSY_CYCLES = 41,
+ TESS_STARVE_CYCLES_PC = 42,
+ TESS_STALL_CYCLES_PC = 43,
+};
+
+enum a4xx_pwr_perfcounter_select {
+ PWR_CORE_CLOCK_CYCLES = 0,
+ PWR_BUSY_CLOCK_CYCLES = 1,
+};
+
+enum a4xx_rb_perfcounter_select {
+ RB_BUSY_CYCLES = 0,
+ RB_BUSY_CYCLES_BINNING = 1,
+ RB_BUSY_CYCLES_RENDERING = 2,
+ RB_BUSY_CYCLES_RESOLVE = 3,
+ RB_STARVE_CYCLES_BY_SP = 4,
+ RB_STARVE_CYCLES_BY_RAS = 5,
+ RB_STARVE_CYCLES_BY_MARB = 6,
+ RB_STALL_CYCLES_BY_MARB = 7,
+ RB_STALL_CYCLES_BY_HLSQ = 8,
+ RB_RB_RB_MARB_DATA = 9,
+ RB_SP_RB_QUAD = 10,
+ RB_RAS_RB_Z_QUADS = 11,
+ RB_GMEM_CH0_READ = 12,
+ RB_GMEM_CH1_READ = 13,
+ RB_GMEM_CH0_WRITE = 14,
+ RB_GMEM_CH1_WRITE = 15,
+ RB_CP_CONTEXT_DONE = 16,
+ RB_CP_CACHE_FLUSH = 17,
+ RB_CP_ZPASS_DONE = 18,
+ RB_STALL_FIFO0_FULL = 19,
+ RB_STALL_FIFO1_FULL = 20,
+ RB_STALL_FIFO2_FULL = 21,
+ RB_STALL_FIFO3_FULL = 22,
+ RB_RB_HLSQ_TRANSACTIONS = 23,
+ RB_Z_READ = 24,
+ RB_Z_WRITE = 25,
+ RB_C_READ = 26,
+ RB_C_WRITE = 27,
+ RB_C_READ_LATENCY = 28,
+ RB_Z_READ_LATENCY = 29,
+ RB_STALL_BY_UCHE = 30,
+ RB_MARB_UCHE_TRANSACTIONS = 31,
+ RB_CACHE_STALL_MISS = 32,
+ RB_CACHE_STALL_FIFO_FULL = 33,
+ RB_8BIT_BLENDER_UNITS_ACTIVE = 34,
+ RB_16BIT_BLENDER_UNITS_ACTIVE = 35,
+ RB_SAMPLER_UNITS_ACTIVE = 36,
+ RB_TOTAL_PASS = 38,
+ RB_Z_PASS = 39,
+ RB_Z_FAIL = 40,
+ RB_S_FAIL = 41,
+ RB_POWER0 = 42,
+ RB_POWER1 = 43,
+ RB_POWER2 = 44,
+ RB_POWER3 = 45,
+ RB_POWER4 = 46,
+ RB_POWER5 = 47,
+ RB_POWER6 = 48,
+ RB_POWER7 = 49,
+};
+
+enum a4xx_rbbm_perfcounter_select {
+ RBBM_ALWAYS_ON = 0,
+ RBBM_VBIF_BUSY = 1,
+ RBBM_TSE_BUSY = 2,
+ RBBM_RAS_BUSY = 3,
+ RBBM_PC_DCALL_BUSY = 4,
+ RBBM_PC_VSD_BUSY = 5,
+ RBBM_VFD_BUSY = 6,
+ RBBM_VPC_BUSY = 7,
+ RBBM_UCHE_BUSY = 8,
+ RBBM_VSC_BUSY = 9,
+ RBBM_HLSQ_BUSY = 10,
+ RBBM_ANY_RB_BUSY = 11,
+ RBBM_ANY_TPL1_BUSY = 12,
+ RBBM_ANY_SP_BUSY = 13,
+ RBBM_ANY_MARB_BUSY = 14,
+ RBBM_ANY_ARB_BUSY = 15,
+ RBBM_AHB_STATUS_BUSY = 16,
+ RBBM_AHB_STATUS_STALLED = 17,
+ RBBM_AHB_STATUS_TXFR = 18,
+ RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+ RBBM_AHB_STATUS_TXFR_ERROR = 20,
+ RBBM_AHB_STATUS_LONG_STALL = 21,
+ RBBM_STATUS_MASKED = 22,
+ RBBM_CP_BUSY_GFX_CORE_IDLE = 23,
+ RBBM_TESS_BUSY = 24,
+ RBBM_COM_BUSY = 25,
+ RBBM_DCOM_BUSY = 32,
+ RBBM_ANY_CCU_BUSY = 33,
+ RBBM_DPM_BUSY = 34,
+};
+
+enum a4xx_sp_perfcounter_select {
+ SP_LM_LOAD_INSTRUCTIONS = 0,
+ SP_LM_STORE_INSTRUCTIONS = 1,
+ SP_LM_ATOMICS = 2,
+ SP_GM_LOAD_INSTRUCTIONS = 3,
+ SP_GM_STORE_INSTRUCTIONS = 4,
+ SP_GM_ATOMICS = 5,
+ SP_VS_STAGE_TEX_INSTRUCTIONS = 6,
+ SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7,
+ SP_VS_STAGE_EFU_INSTRUCTIONS = 8,
+ SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9,
+ SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10,
+ SP_FS_STAGE_TEX_INSTRUCTIONS = 11,
+ SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12,
+ SP_FS_STAGE_EFU_INSTRUCTIONS = 13,
+ SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14,
+ SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15,
+ SP_VS_INSTRUCTIONS = 17,
+ SP_FS_INSTRUCTIONS = 18,
+ SP_ADDR_LOCK_COUNT = 19,
+ SP_UCHE_READ_TRANS = 20,
+ SP_UCHE_WRITE_TRANS = 21,
+ SP_EXPORT_VPC_TRANS = 22,
+ SP_EXPORT_RB_TRANS = 23,
+ SP_PIXELS_KILLED = 24,
+ SP_ICL1_REQUESTS = 25,
+ SP_ICL1_MISSES = 26,
+ SP_ICL0_REQUESTS = 27,
+ SP_ICL0_MISSES = 28,
+ SP_ALU_WORKING_CYCLES = 29,
+ SP_EFU_WORKING_CYCLES = 30,
+ SP_STALL_CYCLES_BY_VPC = 31,
+ SP_STALL_CYCLES_BY_TP = 32,
+ SP_STALL_CYCLES_BY_UCHE = 33,
+ SP_STALL_CYCLES_BY_RB = 34,
+ SP_BUSY_CYCLES = 35,
+ SP_HS_INSTRUCTIONS = 36,
+ SP_DS_INSTRUCTIONS = 37,
+ SP_GS_INSTRUCTIONS = 38,
+ SP_CS_INSTRUCTIONS = 39,
+ SP_SCHEDULER_NON_WORKING = 40,
+ SP_WAVE_CONTEXTS = 41,
+ SP_WAVE_CONTEXT_CYCLES = 42,
+ SP_POWER0 = 43,
+ SP_POWER1 = 44,
+ SP_POWER2 = 45,
+ SP_POWER3 = 46,
+ SP_POWER4 = 47,
+ SP_POWER5 = 48,
+ SP_POWER6 = 49,
+ SP_POWER7 = 50,
+ SP_POWER8 = 51,
+ SP_POWER9 = 52,
+ SP_POWER10 = 53,
+ SP_POWER11 = 54,
+ SP_POWER12 = 55,
+ SP_POWER13 = 56,
+ SP_POWER14 = 57,
+ SP_POWER15 = 58,
+};
+
+enum a4xx_tp_perfcounter_select {
+ TP_L1_REQUESTS = 0,
+ TP_L1_MISSES = 1,
+ TP_QUADS_OFFSET = 8,
+ TP_QUAD_SHADOW = 9,
+ TP_QUADS_ARRAY = 10,
+ TP_QUADS_GRADIENT = 11,
+ TP_QUADS_1D2D = 12,
+ TP_QUADS_3DCUBE = 13,
+ TP_BUSY_CYCLES = 16,
+ TP_STALL_CYCLES_BY_ARB = 17,
+ TP_STATE_CACHE_REQUESTS = 20,
+ TP_STATE_CACHE_MISSES = 21,
+ TP_POWER0 = 22,
+ TP_POWER1 = 23,
+ TP_POWER2 = 24,
+ TP_POWER3 = 25,
+ TP_POWER4 = 26,
+ TP_POWER5 = 27,
+ TP_POWER6 = 28,
+ TP_POWER7 = 29,
+};
+
+enum a4xx_uche_perfcounter_select {
+ UCHE_VBIF_READ_BEATS_TP = 0,
+ UCHE_VBIF_READ_BEATS_VFD = 1,
+ UCHE_VBIF_READ_BEATS_HLSQ = 2,
+ UCHE_VBIF_READ_BEATS_MARB = 3,
+ UCHE_VBIF_READ_BEATS_SP = 4,
+ UCHE_READ_REQUESTS_TP = 5,
+ UCHE_READ_REQUESTS_VFD = 6,
+ UCHE_READ_REQUESTS_HLSQ = 7,
+ UCHE_READ_REQUESTS_MARB = 8,
+ UCHE_READ_REQUESTS_SP = 9,
+ UCHE_WRITE_REQUESTS_MARB = 10,
+ UCHE_WRITE_REQUESTS_SP = 11,
+ UCHE_TAG_CHECK_FAILS = 12,
+ UCHE_EVICTS = 13,
+ UCHE_FLUSHES = 14,
+ UCHE_VBIF_LATENCY_CYCLES = 15,
+ UCHE_VBIF_LATENCY_SAMPLES = 16,
+ UCHE_BUSY_CYCLES = 17,
+ UCHE_VBIF_READ_BEATS_PC = 18,
+ UCHE_READ_REQUESTS_PC = 19,
+ UCHE_WRITE_REQUESTS_VPC = 20,
+ UCHE_STALL_BY_VBIF = 21,
+ UCHE_WRITE_REQUESTS_VSC = 22,
+ UCHE_POWER0 = 23,
+ UCHE_POWER1 = 24,
+ UCHE_POWER2 = 25,
+ UCHE_POWER3 = 26,
+ UCHE_POWER4 = 27,
+ UCHE_POWER5 = 28,
+ UCHE_POWER6 = 29,
+ UCHE_POWER7 = 30,
+};
+
+enum a4xx_vbif_perfcounter_select {
+ AXI_READ_REQUESTS_ID_0 = 0,
+ AXI_READ_REQUESTS_ID_1 = 1,
+ AXI_READ_REQUESTS_ID_2 = 2,
+ AXI_READ_REQUESTS_ID_3 = 3,
+ AXI_READ_REQUESTS_ID_4 = 4,
+ AXI_READ_REQUESTS_ID_5 = 5,
+ AXI_READ_REQUESTS_ID_6 = 6,
+ AXI_READ_REQUESTS_ID_7 = 7,
+ AXI_READ_REQUESTS_ID_8 = 8,
+ AXI_READ_REQUESTS_ID_9 = 9,
+ AXI_READ_REQUESTS_ID_10 = 10,
+ AXI_READ_REQUESTS_ID_11 = 11,
+ AXI_READ_REQUESTS_ID_12 = 12,
+ AXI_READ_REQUESTS_ID_13 = 13,
+ AXI_READ_REQUESTS_ID_14 = 14,
+ AXI_READ_REQUESTS_ID_15 = 15,
+ AXI0_READ_REQUESTS_TOTAL = 16,
+ AXI1_READ_REQUESTS_TOTAL = 17,
+ AXI2_READ_REQUESTS_TOTAL = 18,
+ AXI3_READ_REQUESTS_TOTAL = 19,
+ AXI_READ_REQUESTS_TOTAL = 20,
+ AXI_WRITE_REQUESTS_ID_0 = 21,
+ AXI_WRITE_REQUESTS_ID_1 = 22,
+ AXI_WRITE_REQUESTS_ID_2 = 23,
+ AXI_WRITE_REQUESTS_ID_3 = 24,
+ AXI_WRITE_REQUESTS_ID_4 = 25,
+ AXI_WRITE_REQUESTS_ID_5 = 26,
+ AXI_WRITE_REQUESTS_ID_6 = 27,
+ AXI_WRITE_REQUESTS_ID_7 = 28,
+ AXI_WRITE_REQUESTS_ID_8 = 29,
+ AXI_WRITE_REQUESTS_ID_9 = 30,
+ AXI_WRITE_REQUESTS_ID_10 = 31,
+ AXI_WRITE_REQUESTS_ID_11 = 32,
+ AXI_WRITE_REQUESTS_ID_12 = 33,
+ AXI_WRITE_REQUESTS_ID_13 = 34,
+ AXI_WRITE_REQUESTS_ID_14 = 35,
+ AXI_WRITE_REQUESTS_ID_15 = 36,
+ AXI0_WRITE_REQUESTS_TOTAL = 37,
+ AXI1_WRITE_REQUESTS_TOTAL = 38,
+ AXI2_WRITE_REQUESTS_TOTAL = 39,
+ AXI3_WRITE_REQUESTS_TOTAL = 40,
+ AXI_WRITE_REQUESTS_TOTAL = 41,
+ AXI_TOTAL_REQUESTS = 42,
+ AXI_READ_DATA_BEATS_ID_0 = 43,
+ AXI_READ_DATA_BEATS_ID_1 = 44,
+ AXI_READ_DATA_BEATS_ID_2 = 45,
+ AXI_READ_DATA_BEATS_ID_3 = 46,
+ AXI_READ_DATA_BEATS_ID_4 = 47,
+ AXI_READ_DATA_BEATS_ID_5 = 48,
+ AXI_READ_DATA_BEATS_ID_6 = 49,
+ AXI_READ_DATA_BEATS_ID_7 = 50,
+ AXI_READ_DATA_BEATS_ID_8 = 51,
+ AXI_READ_DATA_BEATS_ID_9 = 52,
+ AXI_READ_DATA_BEATS_ID_10 = 53,
+ AXI_READ_DATA_BEATS_ID_11 = 54,
+ AXI_READ_DATA_BEATS_ID_12 = 55,
+ AXI_READ_DATA_BEATS_ID_13 = 56,
+ AXI_READ_DATA_BEATS_ID_14 = 57,
+ AXI_READ_DATA_BEATS_ID_15 = 58,
+ AXI0_READ_DATA_BEATS_TOTAL = 59,
+ AXI1_READ_DATA_BEATS_TOTAL = 60,
+ AXI2_READ_DATA_BEATS_TOTAL = 61,
+ AXI3_READ_DATA_BEATS_TOTAL = 62,
+ AXI_READ_DATA_BEATS_TOTAL = 63,
+ AXI_WRITE_DATA_BEATS_ID_0 = 64,
+ AXI_WRITE_DATA_BEATS_ID_1 = 65,
+ AXI_WRITE_DATA_BEATS_ID_2 = 66,
+ AXI_WRITE_DATA_BEATS_ID_3 = 67,
+ AXI_WRITE_DATA_BEATS_ID_4 = 68,
+ AXI_WRITE_DATA_BEATS_ID_5 = 69,
+ AXI_WRITE_DATA_BEATS_ID_6 = 70,
+ AXI_WRITE_DATA_BEATS_ID_7 = 71,
+ AXI_WRITE_DATA_BEATS_ID_8 = 72,
+ AXI_WRITE_DATA_BEATS_ID_9 = 73,
+ AXI_WRITE_DATA_BEATS_ID_10 = 74,
+ AXI_WRITE_DATA_BEATS_ID_11 = 75,
+ AXI_WRITE_DATA_BEATS_ID_12 = 76,
+ AXI_WRITE_DATA_BEATS_ID_13 = 77,
+ AXI_WRITE_DATA_BEATS_ID_14 = 78,
+ AXI_WRITE_DATA_BEATS_ID_15 = 79,
+ AXI0_WRITE_DATA_BEATS_TOTAL = 80,
+ AXI1_WRITE_DATA_BEATS_TOTAL = 81,
+ AXI2_WRITE_DATA_BEATS_TOTAL = 82,
+ AXI3_WRITE_DATA_BEATS_TOTAL = 83,
+ AXI_WRITE_DATA_BEATS_TOTAL = 84,
+ AXI_DATA_BEATS_TOTAL = 85,
+ CYCLES_HELD_OFF_ID_0 = 86,
+ CYCLES_HELD_OFF_ID_1 = 87,
+ CYCLES_HELD_OFF_ID_2 = 88,
+ CYCLES_HELD_OFF_ID_3 = 89,
+ CYCLES_HELD_OFF_ID_4 = 90,
+ CYCLES_HELD_OFF_ID_5 = 91,
+ CYCLES_HELD_OFF_ID_6 = 92,
+ CYCLES_HELD_OFF_ID_7 = 93,
+ CYCLES_HELD_OFF_ID_8 = 94,
+ CYCLES_HELD_OFF_ID_9 = 95,
+ CYCLES_HELD_OFF_ID_10 = 96,
+ CYCLES_HELD_OFF_ID_11 = 97,
+ CYCLES_HELD_OFF_ID_12 = 98,
+ CYCLES_HELD_OFF_ID_13 = 99,
+ CYCLES_HELD_OFF_ID_14 = 100,
+ CYCLES_HELD_OFF_ID_15 = 101,
+ AXI_READ_REQUEST_HELD_OFF = 102,
+ AXI_WRITE_REQUEST_HELD_OFF = 103,
+ AXI_REQUEST_HELD_OFF = 104,
+ AXI_WRITE_DATA_HELD_OFF = 105,
+ OCMEM_AXI_READ_REQUEST_HELD_OFF = 106,
+ OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107,
+ OCMEM_AXI_REQUEST_HELD_OFF = 108,
+ OCMEM_AXI_WRITE_DATA_HELD_OFF = 109,
+ ELAPSED_CYCLES_DDR = 110,
+ ELAPSED_CYCLES_OCMEM = 111,
+};
+
+enum a4xx_vfd_perfcounter_select {
+ VFD_UCHE_BYTE_FETCHED = 0,
+ VFD_UCHE_TRANS = 1,
+ VFD_FETCH_INSTRUCTIONS = 3,
+ VFD_BUSY_CYCLES = 5,
+ VFD_STALL_CYCLES_UCHE = 6,
+ VFD_STALL_CYCLES_HLSQ = 7,
+ VFD_STALL_CYCLES_VPC_BYPASS = 8,
+ VFD_STALL_CYCLES_VPC_ALLOC = 9,
+ VFD_MODE_0_FIBERS = 13,
+ VFD_MODE_1_FIBERS = 14,
+ VFD_MODE_2_FIBERS = 15,
+ VFD_MODE_3_FIBERS = 16,
+ VFD_MODE_4_FIBERS = 17,
+ VFD_BFIFO_STALL = 18,
+ VFD_NUM_VERTICES_TOTAL = 19,
+ VFD_PACKER_FULL = 20,
+ VFD_UCHE_REQUEST_FIFO_FULL = 21,
+ VFD_STARVE_CYCLES_PC = 22,
+ VFD_STARVE_CYCLES_UCHE = 23,
+};
+
+enum a4xx_vpc_perfcounter_select {
+ VPC_SP_LM_COMPONENTS = 2,
+ VPC_SP0_LM_BYTES = 3,
+ VPC_SP1_LM_BYTES = 4,
+ VPC_SP2_LM_BYTES = 5,
+ VPC_SP3_LM_BYTES = 6,
+ VPC_WORKING_CYCLES = 7,
+ VPC_STALL_CYCLES_LM = 8,
+ VPC_STARVE_CYCLES_RAS = 9,
+ VPC_STREAMOUT_CYCLES = 10,
+ VPC_UCHE_TRANSACTIONS = 12,
+ VPC_STALL_CYCLES_UCHE = 13,
+ VPC_BUSY_CYCLES = 14,
+ VPC_STARVE_CYCLES_SP = 15,
+};
+
+enum a4xx_vsc_perfcounter_select {
+ VSC_BUSY_CYCLES = 0,
+ VSC_WORKING_CYCLES = 1,
+ VSC_STALL_CYCLES_UCHE = 2,
+ VSC_STARVE_CYCLES_RAS = 3,
+ VSC_EOT_NUM = 4,
+};
+
enum a4xx_tex_filter {
A4XX_TEX_NEAREST = 0,
A4XX_TEX_LINEAR = 1,
@@ -357,6 +897,12 @@ static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val)
#define REG_A4XX_RB_PERFCTR_RB_SEL_7 0x00000cce
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_0 0x00000ccf
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_1 0x00000cd0
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_2 0x00000cd1
+
#define REG_A4XX_RB_PERFCTR_CCU_SEL_3 0x00000cd2
#define REG_A4XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0
@@ -1070,6 +1616,380 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x
#define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c
+#define REG_A4XX_RBBM_PERFCTR_CP_0_HI 0x0000009d
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_LO 0x0000009e
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_HI 0x0000009f
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_LO 0x000000a0
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_HI 0x000000a1
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_LO 0x000000a2
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_HI 0x000000a3
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_LO 0x000000a4
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_HI 0x000000a5
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_LO 0x000000a6
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_HI 0x000000a7
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_LO 0x000000a8
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_HI 0x000000a9
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_LO 0x000000aa
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_HI 0x000000ab
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO 0x000000ac
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI 0x000000ad
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO 0x000000ae
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI 0x000000af
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO 0x000000b0
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI 0x000000b1
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO 0x000000b2
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI 0x000000b3
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_LO 0x000000b4
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_HI 0x000000b5
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_LO 0x000000b6
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_HI 0x000000b7
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_LO 0x000000b8
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_HI 0x000000b9
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_LO 0x000000ba
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_HI 0x000000bb
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_LO 0x000000bc
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_HI 0x000000bd
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_LO 0x000000be
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_HI 0x000000bf
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_LO 0x000000c0
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_HI 0x000000c1
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_LO 0x000000c2
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_HI 0x000000c3
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO 0x000000c4
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI 0x000000c5
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO 0x000000c6
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI 0x000000c7
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO 0x000000c8
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI 0x000000c9
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO 0x000000ca
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI 0x000000cb
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO 0x000000cc
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI 0x000000cd
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO 0x000000ce
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI 0x000000cf
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO 0x000000d0
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI 0x000000d1
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO 0x000000d2
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI 0x000000d3
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000d4
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000d5
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000d6
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000d7
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000d8
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000d9
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000da
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000db
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000dc
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000dd
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000de
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000df
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO 0x000000e0
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI 0x000000e1
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO 0x000000e2
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI 0x000000e3
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO 0x000000e4
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI 0x000000e5
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO 0x000000e6
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI 0x000000e7
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO 0x000000e8
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI 0x000000e9
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO 0x000000ea
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI 0x000000eb
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO 0x000000ec
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI 0x000000ed
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO 0x000000ee
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI 0x000000ef
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO 0x000000f0
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI 0x000000f1
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO 0x000000f2
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI 0x000000f3
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO 0x000000f4
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI 0x000000f5
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO 0x000000f6
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI 0x000000f7
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO 0x000000f8
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI 0x000000f9
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO 0x000000fa
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI 0x000000fb
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO 0x000000fc
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI 0x000000fd
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO 0x000000fe
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI 0x000000ff
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO 0x00000100
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI 0x00000101
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO 0x00000102
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI 0x00000103
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO 0x00000104
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI 0x00000105
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO 0x00000106
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI 0x00000107
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO 0x00000108
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI 0x00000109
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO 0x0000010a
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI 0x0000010b
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO 0x0000010c
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI 0x0000010d
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO 0x0000010e
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI 0x0000010f
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO 0x00000110
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI 0x00000111
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO 0x00000112
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI 0x00000113
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_LO 0x00000116
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_HI 0x00000117
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_LO 0x00000118
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_HI 0x00000119
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_LO 0x0000011a
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_HI 0x0000011b
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_LO 0x0000011c
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_HI 0x0000011d
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_LO 0x0000011e
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_HI 0x0000011f
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_LO 0x00000120
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_HI 0x00000121
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_LO 0x00000122
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_HI 0x00000123
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_LO 0x00000124
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_HI 0x00000125
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_LO 0x00000126
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_HI 0x00000127
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_LO 0x00000128
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_HI 0x00000129
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_LO 0x0000012a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_HI 0x0000012b
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_LO 0x0000012c
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_HI 0x0000012d
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_LO 0x0000012e
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_HI 0x0000012f
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_LO 0x00000130
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_HI 0x00000131
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_LO 0x00000132
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_HI 0x00000133
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_LO 0x00000134
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_HI 0x00000135
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_LO 0x00000136
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_HI 0x00000137
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_LO 0x00000138
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_HI 0x00000139
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_LO 0x0000013a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_HI 0x0000013b
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_LO 0x0000013c
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_HI 0x0000013d
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_LO 0x0000013e
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_HI 0x0000013f
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_LO 0x00000140
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_HI 0x00000141
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_LO 0x00000142
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_HI 0x00000143
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_LO 0x00000144
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_HI 0x00000145
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_LO 0x00000146
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_HI 0x00000147
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_LO 0x00000148
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_HI 0x00000149
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_LO 0x0000014a
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_HI 0x0000014b
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO 0x0000014c
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI 0x0000014d
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO 0x0000014e
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI 0x0000014f
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO 0x00000166
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI 0x00000167
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI 0x00000169
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO 0x0000016e
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI 0x0000016f
+
static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; }
static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; }
@@ -1136,6 +2056,14 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0)
#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000175
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000176
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000177
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000178
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3 0x00000179
+
#define REG_A4XX_RBBM_GPU_BUSY_MASKED 0x0000017a
#define REG_A4XX_RBBM_INT_0_STATUS 0x0000017d
@@ -1272,6 +2200,20 @@ static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240
#define REG_A4XX_CP_PERFCTR_CP_SEL_0 0x00000500
+#define REG_A4XX_CP_PERFCTR_CP_SEL_1 0x00000501
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_2 0x00000502
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_3 0x00000503
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_4 0x00000504
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_5 0x00000505
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_6 0x00000506
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_7 0x00000507
+
#define REG_A4XX_CP_PERFCOMBINER_SELECT 0x0000050b
static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; }
@@ -1802,6 +2744,12 @@ static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A4XX_VPC_DEBUG_ECO_CONTROL 0x00000e64
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0 0x00000e65
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1 0x00000e66
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2 0x00000e67
+
#define REG_A4XX_VPC_PERFCTR_VPC_SEL_3 0x00000e68
#define REG_A4XX_VPC_ATTR 0x00002140
@@ -1914,6 +2862,20 @@ static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0
#define REG_A4XX_VFD_DEBUG_CONTROL 0x00000e40
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0 0x00000e43
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1 0x00000e44
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2 0x00000e45
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3 0x00000e46
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4 0x00000e47
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5 0x00000e48
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6 0x00000e49
+
#define REG_A4XX_VFD_PERFCTR_VFD_SEL_7 0x00000e4a
#define REG_A4XX_VGT_CL_INITIATOR 0x000021d0
@@ -2070,6 +3032,20 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
#define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0 0x00000f04
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1 0x00000f05
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2 0x00000f06
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3 0x00000f07
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4 0x00000f08
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5 0x00000f09
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6 0x00000f0a
+
#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b
#define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380
@@ -2124,8 +3100,20 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c88
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c89
+
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c8a
+
#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c8c
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c8d
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c8e
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c8f
+
#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000
#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000
#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
@@ -2391,6 +3379,20 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
#define REG_A4XX_UCHE_CACHE_WAYS_VFD 0x00000e8c
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e8e
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e8f
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e90
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e91
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e92
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e93
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e94
+
#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e95
#define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD 0x00000e00
@@ -2401,6 +3403,22 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
#define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e06
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e07
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e08
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e09
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e0a
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e0b
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e0c
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e0d
+
#define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0
#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010
#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4
@@ -2655,6 +3673,18 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
#define REG_A4XX_PC_PERFCTR_PC_SEL_0 0x00000d10
+#define REG_A4XX_PC_PERFCTR_PC_SEL_1 0x00000d11
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_2 0x00000d12
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_3 0x00000d13
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_4 0x00000d14
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_5 0x00000d15
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_6 0x00000d16
+
#define REG_A4XX_PC_PERFCTR_PC_SEL_7 0x00000d17
#define REG_A4XX_PC_BIN_BASE 0x000021c0
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
index 074c5a752bf..0c1027d5804 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -49,6 +49,8 @@ struct fd4_context {
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
+ *
+ * (upper area used as scratch bo.. see fd4_query)
*/
struct fd_bo *vsc_size_mem;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 4a3f1da30ed..72154bf286a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
#include "fd4_emit.h"
#include "fd4_blend.h"
@@ -882,6 +883,8 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
OUT_RING(ring, 0x0);
+ fd_hw_query_enable(ctx, ring);
+
ctx->needs_rb_fbd = true;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
index 4f69e0c1694..14a809431ac 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_query.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
@@ -31,6 +31,7 @@
#include "freedreno_util.h"
#include "fd4_query.h"
+#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_format.h"
@@ -81,7 +82,12 @@ static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start,
const struct fd_rb_samp_ctrs *end)
{
- return end->ctr[0] - start->ctr[0];
+ uint64_t n = 0;
+
+ for (unsigned i = 0; i < 16; i += 4)
+ n += end->ctr[i] - start->ctr[i];
+
+ return n / 2;
}
static void
@@ -102,6 +108,127 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx,
result->b |= (n > 0);
}
+/*
+ * Time Elapsed Query:
+ *
+ * Note: we could in theory support timestamp queries, but they
+ * won't give sensible results for tilers.
+ */
+
+static void
+time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ /* Right now, the assignment of countable to counter register is
+ * just hard coded. If we start exposing more countables than we
+ * have counters, we will need to be more clever.
+ */
+ fd_wfi(ctx, ring);
+ OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
+ OUT_RING(ring, CP_ALWAYS_COUNT);
+}
+
+static struct fd_hw_sample *
+time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t));
+
+ /* use unused part of vsc_size_mem as scratch space, to avoid
+ * extra allocation:
+ */
+ struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem;
+ const int sample_off = 128;
+ const int addr_off = sample_off + 8;
+
+ debug_assert(ctx->screen->max_freq > 0);
+
+ /* Basic issue is that we need to read counter value to a relative
+ * destination (with per-tile offset) rather than absolute dest
+ * addr. But there is no pm4 packet that can do that. This is
+ * where it would be *really* nice if we could write our own fw
+ * since afaict implementing the sort of packet we need would be
+ * trivial.
+ *
+ * Instead, we:
+ * (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
+ * (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
+ * (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
+ * address to the per-sample offset in the scratch buffer
+ * (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
+ * to CP_ME_NRT_ADDR
+ * (5) CP_MEM_TO_REG's to copy saved counter value from scratch
+ * buffer to CP_ME_NRT_DATA to trigger the write out to query
+ * result buffer
+ *
+ * Straightforward, right?
+ *
+ * Maybe could swap the order of things in the scratch buffer to
+ * put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
+ * shot, but that's really just polishing a turd..
+ */
+
+ fd_wfi(ctx, ring);
+
+ /* copy sample counter _LO and _HI to scratch: */
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
+ CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */
+ OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
+
+ /* ok... here we really *would* like to use the CP_SET_CONSTANT
+ * mode which can add a constant to value in reg2 and write to
+ * reg1... *but* that only works for banked/context registers,
+ * and CP_ME_NRT_DATA isn't one of those.. so we need to do some
+ * CP math to the scratch buffer instead:
+ *
+ * (note first 8 bytes are counter value, use offset 0x8 for
+ * address calculation)
+ */
+
+ /* per-sample offset to scratch bo: */
+ OUT_PKT3(ring, CP_MEM_WRITE, 2);
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+ OUT_RING(ring, samp->offset);
+
+ /* now add to that the per-tile base: */
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
+ CP_REG_TO_MEM_0_ACCUMULATE |
+ CP_REG_TO_MEM_0_CNT(1-1)); /* readback 1 regs */
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+
+ /* now copy that back to CP_ME_NRT_ADDR: */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+
+ /* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
+ * to trigger the write to result buffer
+ */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
+ OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
+
+ /* and again to get the value of the _HI reg from scratch: */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
+ OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
+
+ /* Sigh.. */
+
+ return samp;
+}
+
+static void
+time_elapsed_accumulate_result(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result)
+{
+ uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
+ /* max_freq is in Hz, convert cycle count to ns: */
+ result->u64 += n * 1000000000 / ctx->screen->max_freq;
+}
+
static const struct fd_hw_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.active = FD_STAGE_DRAW,
@@ -116,8 +243,17 @@ static const struct fd_hw_sample_provider occlusion_predicate = {
.accumulate_result = occlusion_predicate_accumulate_result,
};
+static const struct fd_hw_sample_provider time_elapsed = {
+ .query_type = PIPE_QUERY_TIME_ELAPSED,
+ .active = FD_STAGE_DRAW,
+ .enable = time_elapsed_enable,
+ .get_sample = time_elapsed_get_sample,
+ .accumulate_result = time_elapsed_accumulate_result,
+};
+
void fd4_query_context_init(struct pipe_context *pctx)
{
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
+ fd_hw_query_register_provider(pctx, &time_elapsed);
}
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index f9c0e6aaa83..ac5343f1a78 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index c6741890c69..09b26a253f0 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -172,6 +173,11 @@ enum adreno_pm4_type3_packets {
CP_UNKNOWN_1A = 26,
CP_UNKNOWN_4E = 78,
CP_WIDE_REG_WRITE = 116,
+ CP_SCRATCH_TO_REG = 77,
+ CP_REG_TO_SCRATCH = 74,
+ CP_WAIT_MEM_WRITES = 18,
+ CP_COND_REG_EXEC = 71,
+ CP_MEM_TO_REG = 66,
IN_IB_PREFETCH_END = 23,
IN_SUBBLK_PREFETCH = 31,
IN_INSTR_PREFETCH = 32,
@@ -503,5 +509,29 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val)
return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK;
}
+#define REG_CP_REG_TO_MEM_0 0x00000000
+#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff
+#define CP_REG_TO_MEM_0_REG__SHIFT 0
+static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val)
+{
+ return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK;
+}
+#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000
+#define CP_REG_TO_MEM_0_CNT__SHIFT 19
+static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val)
+{
+ return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK;
+}
+#define CP_REG_TO_MEM_0_64B 0x40000000
+#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000
+
+#define REG_CP_REG_TO_MEM_1 0x00000001
+#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff
+#define CP_REG_TO_MEM_1_DEST__SHIFT 0
+static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val)
+{
+ return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK;
+}
+
#endif /* ADRENO_PM4_XML */
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 9e7130ab915..85ce97c16b7 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -164,6 +164,9 @@ struct fd_context {
*/
struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
+ /* which sample providers were active in the current batch: */
+ uint32_t active_providers;
+
/* tracking for current stage, to know when to start/stop
* any active queries:
*/
diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h
index c2c71da2790..1e4f45ffcd3 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.h
+++ b/src/gallium/drivers/freedreno/freedreno_query.h
@@ -65,4 +65,16 @@ fd_query(struct pipe_query *pq)
void fd_query_screen_init(struct pipe_screen *pscreen);
void fd_query_context_init(struct pipe_context *pctx);
+static inline bool
+skip_begin_query(int type)
+{
+ switch (type) {
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_GPU_FINISHED:
+ return true;
+ default:
+ return false;
+ }
+}
+
#endif /* FREEDRENO_QUERY_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c
index 027fdc9de23..2ac03f22b41 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.c
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -47,6 +47,8 @@ static int pidx(unsigned query_type)
return 0;
case PIPE_QUERY_OCCLUSION_PREDICATE:
return 1;
+ case PIPE_QUERY_TIME_ELAPSED:
+ return 2;
default:
return -1;
}
@@ -89,7 +91,9 @@ static void
resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
struct fd_ringbuffer *ring)
{
+ int idx = pidx(hq->provider->query_type);
assert(!hq->period);
+ ctx->active_providers |= (1 << idx);
hq->period = util_slab_alloc(&ctx->sample_period_pool);
list_inithead(&hq->period->list);
hq->period->start = get_sample(ctx, ring, hq->base.type);
@@ -101,7 +105,9 @@ static void
pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
struct fd_ringbuffer *ring)
{
+ int idx = pidx(hq->provider->query_type);
assert(hq->period && !hq->period->end);
+ assert(ctx->active_providers & (1 << idx));
hq->period->end = get_sample(ctx, ring, hq->base.type);
list_addtail(&hq->period->list, &hq->current_periods);
hq->period = NULL;
@@ -156,6 +162,12 @@ static void
fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
{
struct fd_hw_query *hq = fd_hw_query(q);
+ /* there are a couple special cases, which don't have
+ * a matching ->begin_query():
+ */
+ if (skip_begin_query(q->type) && !q->active) {
+ fd_hw_begin_query(ctx, q);
+ }
if (!q->active)
return;
if (is_active(hq, ctx->stage))
@@ -291,6 +303,8 @@ fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
pipe_reference_init(&samp->reference, 1);
samp->size = size;
+ debug_assert(util_is_power_of_two(size));
+ ctx->next_sample_offset = align(ctx->next_sample_offset, size);
samp->offset = ctx->next_sample_offset;
/* NOTE: util_slab_alloc() does not zero out the buffer: */
samp->bo = NULL;
@@ -318,7 +332,7 @@ prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
assert(samp->tile_stride == tile_stride);
return;
}
- samp->bo = bo;
+ samp->bo = fd_bo_ref(bo);
samp->num_tiles = num_tiles;
samp->tile_stride = tile_stride;
}
@@ -431,6 +445,23 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->stage = stage;
}
+/* call the provider->enable() for all the hw queries that were active
+ * in the current batch. This sets up perfctr selector regs statically
+ * for the duration of the batch.
+ */
+void
+fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
+ if (ctx->active_providers & (1 << idx)) {
+ assert(ctx->sample_providers[idx]);
+ if (ctx->sample_providers[idx]->enable)
+ ctx->sample_providers[idx]->enable(ctx, ring);
+ }
+ }
+ ctx->active_providers = 0; /* clear it for next frame */
+}
+
void
fd_hw_query_register_provider(struct pipe_context *pctx,
const struct fd_hw_sample_provider *provider)
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h
index 8f4b1f58ee5..8a5d114d806 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.h
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h
@@ -76,6 +76,11 @@ struct fd_hw_sample_provider {
/* stages applicable to the query type: */
enum fd_render_stage active;
+ /* Optional hook for enabling a counter. Guaranteed to happen
+ * at least once before the first ->get_sample() in a batch.
+ */
+ void (*enable)(struct fd_context *ctx, struct fd_ringbuffer *ring);
+
/* when a new sample is required, emit appropriate cmdstream
* and return a sample object:
*/
@@ -144,6 +149,7 @@ void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
struct fd_ringbuffer *ring);
void fd_hw_query_set_stage(struct fd_context *ctx,
struct fd_ringbuffer *ring, enum fd_render_stage stage);
+void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring);
void fd_hw_query_register_provider(struct pipe_context *pctx,
const struct fd_hw_sample_provider *provider);
void fd_hw_query_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 27f4d267438..2b3ecfe664e 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -298,12 +298,14 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return is_a3xx(screen) ? 1 : 0;
/* Queries. */
- case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
return is_a3xx(screen) || is_a4xx(screen);
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ /* only a4xx, requires new enough kernel so we know max_freq: */
+ return (screen->max_freq > 0) && is_a4xx(screen);
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
@@ -434,9 +436,12 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
}
debug_printf("unknown shader param %d\n", param);
@@ -534,6 +539,16 @@ fd_screen_create(struct fd_device *dev)
}
screen->device_id = val;
+ if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) {
+ DBG("could not get gpu freq");
+ /* this limits what performance related queries are
+ * supported but is not fatal
+ */
+ screen->max_freq = 0;
+ } else {
+ screen->max_freq = val;
+ }
+
if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
DBG("could not get gpu-id");
goto fail;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index 8fb096a10dd..a81c7786390 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -56,6 +56,7 @@ struct fd_screen {
uint32_t device_id;
uint32_t gpu_id; /* 220, 305, etc */
uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */
+ uint32_t max_freq;
uint32_t max_rts; /* max # of render targets */
void *compiler; /* currently unused for a2xx */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index ffa75775505..7a1812f2518 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1365,7 +1365,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
struct ir3_block *b = ctx->block;
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
- struct ir3_instruction *const_off[4];
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
unsigned i, coords, flags;
unsigned nsrc0 = 0, nsrc1 = 0;
diff --git a/src/gallium/drivers/ilo/ilo_gpgpu.c b/src/gallium/drivers/ilo/ilo_gpgpu.c
index b7415901a88..ab165b6d43b 100644
--- a/src/gallium/drivers/ilo/ilo_gpgpu.c
+++ b/src/gallium/drivers/ilo/ilo_gpgpu.c
@@ -79,9 +79,7 @@ launch_grid(struct ilo_context *ilo,
}
static void
-ilo_launch_grid(struct pipe_context *pipe,
- const uint *block_layout, const uint *grid_layout,
- uint32_t pc, const void *input)
+ilo_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *cs = ilo->state_vector.cs;
@@ -92,13 +90,13 @@ ilo_launch_grid(struct pipe_context *pipe,
input_buf.buffer_size =
ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE);
if (input_buf.buffer_size) {
- u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input,
+ u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, info->input,
&input_buf.buffer_offset, &input_buf.buffer);
}
ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
- launch_grid(ilo, block_layout, grid_layout, &input_buf, pc);
+ launch_grid(ilo, info->block, info->grid, &input_buf, info->pc);
ilo_render_invalidate_hw(ilo->render);
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 44d7c11af43..ef9da6b8315 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -136,6 +136,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
return ILO_MAX_SAMPLER_VIEWS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 8dc2d38e039..f8d2637cc6f 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -1851,7 +1851,7 @@ ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader,
static void
ilo_set_shader_images(struct pipe_context *pipe, unsigned shader,
unsigned start, unsigned count,
- struct pipe_image_view **views)
+ struct pipe_image_view *views)
{
#if 0
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index d22e50777fa..9e56c962d2d 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -910,7 +910,9 @@ lp_rast_create( unsigned num_threads )
create_rast_threads(rast);
/* for synchronizing rasterization threads */
- pipe_barrier_init( &rast->barrier, rast->num_threads );
+ if (rast->num_threads > 0) {
+ pipe_barrier_init( &rast->barrier, rast->num_threads );
+ }
memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
@@ -967,7 +969,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
}
/* for synchronizing rasterization threads */
- pipe_barrier_destroy( &rast->barrier );
+ if (rast->num_threads > 0) {
+ pipe_barrier_destroy( &rast->barrier );
+ }
lp_scene_queue_destroy(rast->full_scenes);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 5ab297d7e1a..97146912704 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -169,8 +169,8 @@ struct lp_setup_context
};
static inline void
-scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox,
- struct u_rect *scissor)
+scissor_planes_needed(boolean scis_planes[4], const struct u_rect *bbox,
+ const struct u_rect *scissor)
{
/* left */
scis_planes[0] = (bbox->x0 < scissor->x0);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index af4e7900d3c..018130c3192 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -719,7 +719,7 @@ try_setup_line( struct lp_setup_context *setup,
*/
if (nr_planes > 4) {
/* why not just use draw_regions */
- struct u_rect *scissor = &setup->scissors[viewport_index];
+ const struct u_rect *scissor = &setup->scissors[viewport_index];
struct lp_rast_plane *plane_s = &plane[4];
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, scissor);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index cdb3d015dec..29aee726941 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -681,7 +681,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
*/
if (nr_planes > 3) {
/* why not just use draw_regions */
- struct u_rect *scissor = &setup->scissors[viewport_index];
+ const struct u_rect *scissor = &setup->scissors[viewport_index];
struct lp_rast_plane *plane_s = &plane[3];
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, scissor);
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 31a93659647..43ffce63a25 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -60,6 +60,8 @@ NV30_C_SOURCES := \
nv30/nvfx_vertprog.c
NV50_C_SOURCES := \
+ nv50/g80_defs.xml.h \
+ nv50/g80_texture.xml.h \
nv50/nv50_2d.xml.h \
nv50/nv50_3ddefs.xml.h \
nv50/nv50_3d.xml.h \
@@ -68,7 +70,6 @@ NV50_C_SOURCES := \
nv50/nv50_compute.xml.h \
nv50/nv50_context.c \
nv50/nv50_context.h \
- nv50/nv50_defs.xml.h \
nv50/nv50_formats.c \
nv50/nv50_miptree.c \
nv50/nv50_program.c \
@@ -93,7 +94,6 @@ NV50_C_SOURCES := \
nv50/nv50_state_validate.c \
nv50/nv50_surface.c \
nv50/nv50_tex.c \
- nv50/nv50_texture.xml.h \
nv50/nv50_transfer.c \
nv50/nv50_transfer.h \
nv50/nv50_vbo.c \
@@ -147,6 +147,7 @@ NVC0_CODEGEN_SOURCES := \
codegen/nv50_ir_target_nvc0.h
NVC0_C_SOURCES := \
+ nvc0/gm107_texture.xml.h \
nvc0/nvc0_3d.xml.h \
nvc0/nvc0_compute.c \
nvc0/nvc0_compute.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 9d7becf27d4..97ebed455b6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -232,6 +232,8 @@ enum operation
#define NV50_IR_SUBOP_SHFL_UP 1
#define NV50_IR_SUBOP_SHFL_DOWN 2
#define NV50_IR_SUBOP_SHFL_BFLY 3
+#define NV50_IR_SUBOP_LOAD_LOCKED 1
+#define NV50_IR_SUBOP_STORE_UNLOCKED 2
#define NV50_IR_SUBOP_MADSP_SD 0xffff
// Yes, we could represent those with DataType.
// Or put the type into operation and have a couple 1000 values in that enum.
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 0c7cd1d8137..a78b3f954a4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -433,6 +433,10 @@ CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
break;
default:
+ if (i->op == OP_SELP) {
+ assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
+ srcId(i->src(s), 42);
+ }
// ignore here, can be predicate or flags, but must not be address
break;
}
@@ -1045,7 +1049,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
{
emitForm_21(i, 0x250, 0x050);
- if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT)))
+ if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 13;
}
@@ -1239,7 +1243,7 @@ CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask
defId(i->def(0), 2);
srcId(i->src(0), 10);
- srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23);
+ srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
code[1] |= 1 << 9; // dall
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index dee26225b7e..93c40d15e46 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -193,6 +193,8 @@ private:
void emitNOP();
void emitKIL();
void emitOUT();
+
+ void emitMEMBAR();
};
/*******************************************************************************
@@ -248,6 +250,8 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val)
case SV_INVOCATION_ID : id = 0x11; break;
case SV_THREAD_KILL : id = 0x13; break;
case SV_INVOCATION_INFO: id = 0x1d; break;
+ case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
+ case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
default:
assert(!"invalid system value");
id = 0;
@@ -1531,7 +1535,10 @@ CodeEmitterGM107::emitFSWZADD()
emitRND (0x27);
emitField(0x26, 1, insn->lanes); /* abused for .ndv */
emitField(0x1c, 8, insn->subOp);
- emitGPR (0x14, insn->src(1));
+ if (insn->predSrc != 1)
+ emitGPR (0x14, insn->src(1));
+ else
+ emitGPR (0x14);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
@@ -2327,22 +2334,34 @@ void
CodeEmitterGM107::emitATOM()
{
unsigned dType, subOp;
- switch (insn->dType) {
- case TYPE_U32: dType = 0; break;
- case TYPE_S32: dType = 1; break;
- case TYPE_U64: dType = 2; break;
- case TYPE_F32: dType = 3; break;
- case TYPE_B128: dType = 4; break;
- case TYPE_S64: dType = 5; break;
- default: assert(!"unexpected dType"); dType = 0; break;
- }
- if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
- subOp = 8;
- else
- subOp = insn->subOp;
- assert(insn->subOp != NV50_IR_SUBOP_ATOM_CAS); /* XXX */
- emitInsn (0xed000000);
+ if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ switch (insn->dType) {
+ case TYPE_U32: dType = 0; break;
+ case TYPE_U64: dType = 1; break;
+ default: assert(!"unexpected dType"); dType = 0; break;
+ }
+ subOp = 15;
+
+ emitInsn (0xee000000);
+ } else {
+ switch (insn->dType) {
+ case TYPE_U32: dType = 0; break;
+ case TYPE_S32: dType = 1; break;
+ case TYPE_U64: dType = 2; break;
+ case TYPE_F32: dType = 3; break;
+ case TYPE_B128: dType = 4; break;
+ case TYPE_S64: dType = 5; break;
+ default: assert(!"unexpected dType"); dType = 0; break;
+ }
+ if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
+ subOp = 8;
+ else
+ subOp = insn->subOp;
+
+ emitInsn (0xed000000);
+ }
+
emitField(0x34, 4, subOp);
emitField(0x31, 3, dType);
emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
@@ -2627,6 +2646,13 @@ CodeEmitterGM107::emitOUT()
emitGPR (0x00, insn->def(0));
}
+void
+CodeEmitterGM107::emitMEMBAR()
+{
+ emitInsn (0xef980000);
+ emitField(0x08, 2, insn->subOp >> 2);
+}
+
/*******************************************************************************
* assembler front-end
******************************************************************************/
@@ -2926,6 +2952,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_RESTART:
emitOUT();
break;
+ case OP_MEMBAR:
+ emitMEMBAR();
+ break;
default:
assert(!"invalid opcode");
emitNOP();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index bc8354deba1..682a19d6d78 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -527,7 +527,8 @@ CodeEmitterNV50::emitForm_ADD(const Instruction *i)
setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
setSrc(i, 0, 0);
- setSrc(i, 1, 2);
+ if (i->predSrc != 1)
+ setSrc(i, 1, 2);
if (i->getIndirect(0, 0)) {
assert(!i->getIndirect(1, 0));
@@ -840,7 +841,7 @@ CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
emitForm_ADD(i);
- if (!i->srcExists(1))
+ if (!i->srcExists(1) || i->predSrc == 1)
srcId(i->src(0), 32 + 14);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 8637db91521..0068da5cbb7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -398,6 +398,11 @@ CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
break;
default:
+ if (i->op == OP_SELP) {
+ // OP_SELP is used to implement shared+atomics on Fermi.
+ assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
+ srcId(i->src(s), 49);
+ }
// ignore here, can be predicate or flags, but must not be address
break;
}
@@ -1174,7 +1179,7 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i)
{
emitForm_A(i, HEX64(20000000, 00000004));
- if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
+ if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 20;
}
@@ -1334,7 +1339,7 @@ CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
defId(i->def(0), 14);
srcId(i->src(0), 20);
- srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
+ srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
code[0] |= 1 << 9; // dall
@@ -1773,7 +1778,16 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i)
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
- case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
+ case FILE_MEMORY_SHARED:
+ if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ opc = 0xb8000000;
+ else
+ opc = 0xcc000000;
+ } else {
+ opc = 0xc9000000;
+ }
+ break;
default:
assert(!"invalid memory file");
opc = 0;
@@ -1782,6 +1796,15 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i)
code[0] = 0x00000005;
code[1] = opc;
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
+ // Unlocked store on shared memory can fail.
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
+ i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
+ assert(i->defExists(0));
+ defId(i->def(0), 8);
+ }
+ }
+
setAddressByFile(i->src(0));
srcId(i->src(1), 14);
srcId(i->src(0).getIndirect(0), 20);
@@ -1804,7 +1827,16 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
- case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
+ case FILE_MEMORY_SHARED:
+ if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ opc = 0xa8000000;
+ else
+ opc = 0xc4000000;
+ } else {
+ opc = 0xc1000000;
+ }
+ break;
case FILE_MEMORY_CONST:
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
emitMOV(i); // not sure if this is any better
@@ -1820,6 +1852,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
}
code[1] = opc;
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
+ if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ assert(i->defExists(1));
+ defId(i->def(1), 32 + 18);
+ }
+ }
+
defId(i->def(0), 14);
setAddressByFile(i->src(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 52ac198221d..d06e9efa463 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -374,6 +374,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
+ case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@@ -858,6 +859,11 @@ public:
};
std::vector<Resource> resources;
+ struct MemoryFile {
+ bool shared;
+ };
+ std::vector<MemoryFile> memoryFiles;
+
private:
int inferSysValDirection(unsigned sn) const;
bool scanDeclaration(const struct tgsi_full_declaration *);
@@ -904,6 +910,7 @@ bool Source::scanSource()
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
+ memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
info->immd.bufSize = 0;
@@ -1213,6 +1220,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
for (i = first; i <= last; ++i)
textureViews[i].target = decl->SamplerView.Resource;
break;
+ case TGSI_FILE_MEMORY:
+ for (i = first; i <= last; ++i)
+ memoryFiles[i].shared = decl->Declaration.Shared;
+ break;
+ case TGSI_FILE_NULL:
case TGSI_FILE_TEMPORARY:
for (i = first; i <= last; ++i)
tempArrayId[i] = arrayId;
@@ -1220,7 +1232,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
first, last - first + 1)));
break;
- case TGSI_FILE_NULL:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
@@ -1516,6 +1527,9 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
sym->reg.fileIndex = fileIdx;
+ if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared)
+ sym->setFile(FILE_MEMORY_SHARED);
+
if (idx >= 0) {
if (sym->reg.file == FILE_SHADER_INPUT)
sym->setOffset(info->in[idx].slot[c] * 4);
@@ -1769,7 +1783,7 @@ Converter::acquireDst(int d, int c)
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
- if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
+ if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY)
return NULL;
if (dst.isIndirect(0) ||
@@ -2239,7 +2253,8 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector<Value *> off, src, ldv, def;
- if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
+ tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@@ -2248,9 +2263,10 @@ Converter::handleLOAD(Value *dst0[4])
Symbol *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
+ tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
} else {
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 4 * c);
}
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
@@ -2337,7 +2353,8 @@ Converter::handleSTORE()
int c;
std::vector<Value *> off, src, dummy;
- if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER ||
+ tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) {
for (c = 0; c < 4; ++c) {
if (!(tgsi.getDst(0).getMask() & (1 << c)))
continue;
@@ -2346,11 +2363,11 @@ Converter::handleSTORE()
Value *off;
if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
+ sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
} else {
off = fetchSrc(0, 0);
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
}
Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
@@ -2422,7 +2439,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector<Value *> defv;
LValue *dst = getScratch();
- if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
+ tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
for (int c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@@ -2431,9 +2449,10 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
Value *off = fetchSrc(1, c), *off2 = NULL;
Value *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
+ tgsi.getSrc(1).getValueU32(c, info));
else
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
if (tgsi.getSrc(0).isIndirect(0))
off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index e7cb54bc426..d181f1574f1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1033,6 +1033,100 @@ NVC0LoweringPass::handleSUQ(Instruction *suq)
return true;
}
+void
+NVC0LoweringPass::handleSharedATOM(Instruction *atom)
+{
+ assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
+
+ BasicBlock *currBB = atom->bb;
+ BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false);
+ BasicBlock *joinBB = atom->bb->splitAfter(atom);
+
+ bld.setPosition(currBB, true);
+ assert(!currBB->joinAt);
+ currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
+
+ bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL);
+ currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE);
+
+ bld.setPosition(tryLockAndSetBB, true);
+
+ Instruction *ld =
+ bld.mkLoad(TYPE_U32, atom->getDef(0),
+ bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL);
+ ld->setDef(1, bld.getSSA(1, FILE_PREDICATE));
+ ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED;
+
+ Value *stVal;
+ if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
+ // Read the old value, and write the new one.
+ stVal = atom->getSrc(1);
+ } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ CmpInstruction *set =
+ bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+ TYPE_U32, ld->getDef(0), atom->getSrc(1));
+ set->setPredicate(CC_P, ld->getDef(1));
+
+ Instruction *selp =
+ bld.mkOp3(OP_SELP, TYPE_U32, bld.getSSA(), ld->getDef(0),
+ atom->getSrc(2), set->getDef(0));
+ selp->src(2).mod = Modifier(NV50_IR_MOD_NOT);
+ selp->setPredicate(CC_P, ld->getDef(1));
+
+ stVal = selp->getDef(0);
+ } else {
+ operation op;
+
+ switch (atom->subOp) {
+ case NV50_IR_SUBOP_ATOM_ADD:
+ op = OP_ADD;
+ break;
+ case NV50_IR_SUBOP_ATOM_AND:
+ op = OP_AND;
+ break;
+ case NV50_IR_SUBOP_ATOM_OR:
+ op = OP_OR;
+ break;
+ case NV50_IR_SUBOP_ATOM_XOR:
+ op = OP_XOR;
+ break;
+ case NV50_IR_SUBOP_ATOM_MIN:
+ op = OP_MIN;
+ break;
+ case NV50_IR_SUBOP_ATOM_MAX:
+ op = OP_MAX;
+ break;
+ default:
+ assert(0);
+ }
+
+ Instruction *i =
+ bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0),
+ atom->getSrc(1));
+ i->setPredicate(CC_P, ld->getDef(1));
+
+ stVal = i->getDef(0);
+ }
+
+ Instruction *st =
+ bld.mkStore(OP_STORE, TYPE_U32,
+ bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0),
+ NULL, stVal);
+ st->setPredicate(CC_P, ld->getDef(1));
+ st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED;
+
+ // Loop until the lock is acquired.
+ bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1));
+ tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK);
+ tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS);
+ bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);
+
+ bld.remove(atom);
+
+ bld.setPosition(joinBB, false);
+ bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
+}
+
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
@@ -1044,8 +1138,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
sv = SV_LBASE;
break;
case FILE_MEMORY_SHARED:
- sv = SV_SBASE;
- break;
+ handleSharedATOM(atom);
+ return true;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
@@ -1072,6 +1166,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
bool
NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
{
+ if (cas->src(0).getFile() == FILE_MEMORY_SHARED) {
+ // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM().
+ return false;
+ }
+
if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS &&
cas->subOp != NV50_IR_SUBOP_ATOM_EXCH)
return false;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 09ec7e69ddc..6eb8aff3036 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -105,6 +105,7 @@ protected:
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
+ void handleSharedATOM(Instruction *);
void checkPredicate(Instruction *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 05b8db4a3d8..6192c0665e4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1539,6 +1539,7 @@ private:
void handleCVT_CVT(Instruction *);
void handleCVT_EXTBF(Instruction *);
void handleSUCLAMP(Instruction *);
+ void handleNEG(Instruction *);
BuildUtil bld;
};
@@ -1634,6 +1635,9 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb)
return false;
+ if (src->getInsn()->saturate)
+ return false;
+
if (src->getInsn()->postFactor)
return false;
if (toOp == OP_SAD) {
@@ -2011,6 +2015,34 @@ AlgebraicOpt::handleSUCLAMP(Instruction *insn)
insn->setSrc(0, add->getSrc(s));
}
+// NEG(AND(SET, 1)) -> SET
+void
+AlgebraicOpt::handleNEG(Instruction *i) {
+ Instruction *src = i->getSrc(0)->getInsn();
+ ImmediateValue imm;
+ int b;
+
+ if (isFloatType(i->sType) || !src || src->op != OP_AND)
+ return;
+
+ if (src->src(0).getImmediate(imm))
+ b = 1;
+ else if (src->src(1).getImmediate(imm))
+ b = 0;
+ else
+ return;
+
+ if (!imm.isInteger(1))
+ return;
+
+ Instruction *set = src->getSrc(b)->getInsn();
+ if ((set->op == OP_SET || set->op == OP_SET_AND ||
+ set->op == OP_SET_OR || set->op == OP_SET_XOR) &&
+ !isFloatType(set->dType)) {
+ i->def(0).replace(set->getDef(0), false);
+ }
+}
+
bool
AlgebraicOpt::visit(BasicBlock *bb)
{
@@ -2048,6 +2080,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
case OP_SUCLAMP:
handleSUCLAMP(i);
break;
+ case OP_NEG:
+ handleNEG(i);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 47285a25c33..85f77047c5c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -198,6 +198,11 @@ static const char *atomSubOpStr[] =
"add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch"
};
+static const char *ldstSubOpStr[] =
+{
+ "", "lock", "unlock"
+};
+
static const char *DataTypeStr[] =
{
"-",
@@ -537,6 +542,11 @@ void Instruction::print() const
if (subOp < Elements(atomSubOpStr))
PRINT("%s ", atomSubOpStr[subOp]);
break;
+ case OP_LOAD:
+ case OP_STORE:
+ if (subOp < Elements(ldstSubOpStr))
+ PRINT("%s ", ldstSubOpStr[subOp]);
+ break;
default:
if (subOp)
PRINT("(SUBOP:%u) ", subOp);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index de39be872e4..d877c253a17 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -968,6 +968,7 @@ GCRA::coalesce(ArrayList& insns)
case 0xf0:
case 0x100:
case 0x110:
+ case 0x120:
ret = doCoalesce(insns, JOIN_MASK_UNION);
break;
default:
@@ -2231,6 +2232,7 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
texConstraintNVE0(tex);
break;
case 0x110:
+ case 0x120:
texConstraintGM107(tex);
break;
default:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index ae0a8bb61d1..89d3a08937f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -143,6 +143,7 @@ Target *Target::create(unsigned int chipset)
STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1);
switch (chipset & ~0xf) {
case 0x110:
+ case 0x120:
return getTargetGM107(chipset);
case 0xc0:
case 0xd0:
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index b62889119c5..5be7a3dab76 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -280,6 +280,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -324,6 +325,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
new file mode 100644
index 00000000000..5d40624bb9e
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
@@ -0,0 +1,279 @@
+#ifndef G80_DEFS_XML
+#define G80_DEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/g80_texture.xml ( 18837 bytes, from 2016-01-14 23:54:22)
+- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40)
+- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-02 23:45:00)
+- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42)
+- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 5388 bytes, from 2016-01-14 23:54:22)
+
+Copyright (C) 2006-2016 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define G80_VSTATUS_IDLE 0x00000000
+#define G80_VSTATUS_BUSY 0x00000001
+#define G80_VSTATUS_UNK2 0x00000002
+#define G80_VSTATUS_WAITING 0x00000003
+#define G80_VSTATUS_BLOCKED 0x00000005
+#define G80_VSTATUS_FAULTED 0x00000006
+#define G80_VSTATUS_PAUSED 0x00000007
+#define G80_TIC_SOURCE_ZERO 0x00000000
+#define G80_TIC_SOURCE_R 0x00000002
+#define G80_TIC_SOURCE_G 0x00000003
+#define G80_TIC_SOURCE_B 0x00000004
+#define G80_TIC_SOURCE_A 0x00000005
+#define G80_TIC_SOURCE_ONE_INT 0x00000006
+#define G80_TIC_SOURCE_ONE_FLOAT 0x00000007
+#define G80_TIC_TYPE_SNORM 0x00000001
+#define G80_TIC_TYPE_UNORM 0x00000002
+#define G80_TIC_TYPE_SINT 0x00000003
+#define G80_TIC_TYPE_UINT 0x00000004
+#define G80_TIC_TYPE_SNORM_FORCE_FP16 0x00000005
+#define G80_TIC_TYPE_UNORM_FORCE_FP16 0x00000006
+#define G80_TIC_TYPE_FLOAT 0x00000007
+#define G80_SURFACE_FORMAT_BITMAP 0x0000001c
+#define G80_SURFACE_FORMAT_UNK1D 0x0000001d
+#define G80_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0
+#define G80_SURFACE_FORMAT_RGBA32_SINT 0x000000c1
+#define G80_SURFACE_FORMAT_RGBA32_UINT 0x000000c2
+#define G80_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3
+#define G80_SURFACE_FORMAT_RGBX32_SINT 0x000000c4
+#define G80_SURFACE_FORMAT_RGBX32_UINT 0x000000c5
+#define G80_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6
+#define G80_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7
+#define G80_SURFACE_FORMAT_RGBA16_SINT 0x000000c8
+#define G80_SURFACE_FORMAT_RGBA16_UINT 0x000000c9
+#define G80_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca
+#define G80_SURFACE_FORMAT_RG32_FLOAT 0x000000cb
+#define G80_SURFACE_FORMAT_RG32_SINT 0x000000cc
+#define G80_SURFACE_FORMAT_RG32_UINT 0x000000cd
+#define G80_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce
+#define G80_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf
+#define G80_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0
+#define G80_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1
+#define G80_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2
+#define G80_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5
+#define G80_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6
+#define G80_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7
+#define G80_SURFACE_FORMAT_RGBA8_SINT 0x000000d8
+#define G80_SURFACE_FORMAT_RGBA8_UINT 0x000000d9
+#define G80_SURFACE_FORMAT_RG16_UNORM 0x000000da
+#define G80_SURFACE_FORMAT_RG16_SNORM 0x000000db
+#define G80_SURFACE_FORMAT_RG16_SINT 0x000000dc
+#define G80_SURFACE_FORMAT_RG16_UINT 0x000000dd
+#define G80_SURFACE_FORMAT_RG16_FLOAT 0x000000de
+#define G80_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df
+#define G80_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0
+#define G80_SURFACE_FORMAT_R32_SINT 0x000000e3
+#define G80_SURFACE_FORMAT_R32_UINT 0x000000e4
+#define G80_SURFACE_FORMAT_R32_FLOAT 0x000000e5
+#define G80_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6
+#define G80_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7
+#define G80_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8
+#define G80_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9
+#define G80_SURFACE_FORMAT_RG8_UNORM 0x000000ea
+#define G80_SURFACE_FORMAT_RG8_SNORM 0x000000eb
+#define G80_SURFACE_FORMAT_RG8_SINT 0x000000ec
+#define G80_SURFACE_FORMAT_RG8_UINT 0x000000ed
+#define G80_SURFACE_FORMAT_R16_UNORM 0x000000ee
+#define G80_SURFACE_FORMAT_R16_SNORM 0x000000ef
+#define G80_SURFACE_FORMAT_R16_SINT 0x000000f0
+#define G80_SURFACE_FORMAT_R16_UINT 0x000000f1
+#define G80_SURFACE_FORMAT_R16_FLOAT 0x000000f2
+#define G80_SURFACE_FORMAT_R8_UNORM 0x000000f3
+#define G80_SURFACE_FORMAT_R8_SNORM 0x000000f4
+#define G80_SURFACE_FORMAT_R8_SINT 0x000000f5
+#define G80_SURFACE_FORMAT_R8_UINT 0x000000f6
+#define G80_SURFACE_FORMAT_A8_UNORM 0x000000f7
+#define G80_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8
+#define G80_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9
+#define G80_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa
+#define G80_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb
+#define G80_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc
+#define G80_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd
+#define G80_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe
+#define G80_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff
+#define G80_ZETA_FORMAT_Z32_FLOAT 0x0000000a
+#define G80_ZETA_FORMAT_Z16_UNORM 0x00000013
+#define G80_ZETA_FORMAT_S8_Z24_UNORM 0x00000014
+#define G80_ZETA_FORMAT_Z24_X8_UNORM 0x00000015
+#define G80_ZETA_FORMAT_Z24_S8_UNORM 0x00000016
+#define G80_ZETA_FORMAT_Z24_C8_UNORM 0x00000018
+#define G80_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019
+#define G80_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d
+#define G80_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e
+#define G80_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f
+#define GK104_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002
+#define GK104_IMAGE_FORMAT_RGBA32_SINT 0x00000003
+#define GK104_IMAGE_FORMAT_RGBA32_UINT 0x00000004
+#define GK104_IMAGE_FORMAT_RGBA16_UNORM 0x00000008
+#define GK104_IMAGE_FORMAT_RGBA16_SNORM 0x00000009
+#define GK104_IMAGE_FORMAT_RGBA16_SINT 0x0000000a
+#define GK104_IMAGE_FORMAT_RGBA16_UINT 0x0000000b
+#define GK104_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c
+#define GK104_IMAGE_FORMAT_RG32_FLOAT 0x0000000d
+#define GK104_IMAGE_FORMAT_RG32_SINT 0x0000000e
+#define GK104_IMAGE_FORMAT_RG32_UINT 0x0000000f
+#define GK104_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013
+#define GK104_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015
+#define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x00000018
+#define GK104_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a
+#define GK104_IMAGE_FORMAT_RGBA8_SINT 0x0000001b
+#define GK104_IMAGE_FORMAT_RGBA8_UINT 0x0000001c
+#define GK104_IMAGE_FORMAT_RG16_UNORM 0x0000001d
+#define GK104_IMAGE_FORMAT_RG16_SNORM 0x0000001e
+#define GK104_IMAGE_FORMAT_RG16_SINT 0x0000001f
+#define GK104_IMAGE_FORMAT_RG16_UINT 0x00000020
+#define GK104_IMAGE_FORMAT_RG16_FLOAT 0x00000021
+#define GK104_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024
+#define GK104_IMAGE_FORMAT_R32_SINT 0x00000027
+#define GK104_IMAGE_FORMAT_R32_UINT 0x00000028
+#define GK104_IMAGE_FORMAT_R32_FLOAT 0x00000029
+#define GK104_IMAGE_FORMAT_RG8_UNORM 0x0000002e
+#define GK104_IMAGE_FORMAT_RG8_SNORM 0x0000002f
+#define GK104_IMAGE_FORMAT_RG8_SINT 0x00000030
+#define GK104_IMAGE_FORMAT_RG8_UINT 0x00000031
+#define GK104_IMAGE_FORMAT_R16_UNORM 0x00000032
+#define GK104_IMAGE_FORMAT_R16_SNORM 0x00000033
+#define GK104_IMAGE_FORMAT_R16_SINT 0x00000034
+#define GK104_IMAGE_FORMAT_R16_UINT 0x00000035
+#define GK104_IMAGE_FORMAT_R16_FLOAT 0x00000036
+#define GK104_IMAGE_FORMAT_R8_UNORM 0x00000037
+#define GK104_IMAGE_FORMAT_R8_SNORM 0x00000038
+#define GK104_IMAGE_FORMAT_R8_SINT 0x00000039
+#define GK104_IMAGE_FORMAT_R8_UINT 0x0000003a
+#define G80_PGRAPH_DATA_ERROR_INVALID_OPERATION 0x00000003
+#define G80_PGRAPH_DATA_ERROR_INVALID_VALUE 0x00000004
+#define G80_PGRAPH_DATA_ERROR_INVALID_ENUM 0x00000005
+#define G80_PGRAPH_DATA_ERROR_INVALID_OBJECT 0x00000008
+#define G80_PGRAPH_DATA_ERROR_READ_ONLY_OBJECT 0x00000009
+#define G80_PGRAPH_DATA_ERROR_SUPERVISOR_OBJECT 0x0000000a
+#define G80_PGRAPH_DATA_ERROR_INVALID_ADDRESS_ALIGNMENT 0x0000000b
+#define G80_PGRAPH_DATA_ERROR_INVALID_BITFIELD 0x0000000c
+#define G80_PGRAPH_DATA_ERROR_BEGIN_END_ACTIVE 0x0000000d
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_BACK_OVER_LIMIT 0x0000000e
+#define G80_PGRAPH_DATA_ERROR_VIEWPORT_ID_NEEDS_GP 0x0000000f
+#define G80_PGRAPH_DATA_ERROR_RT_DOUBLE_BIND 0x00000010
+#define G80_PGRAPH_DATA_ERROR_RT_TYPES_MISMATCH 0x00000011
+#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_ZETA 0x00000012
+#define G80_PGRAPH_DATA_ERROR_FP_TOO_FEW_REGS 0x00000015
+#define G80_PGRAPH_DATA_ERROR_ZETA_FORMAT_CSAA_MISMATCH 0x00000016
+#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_MSAA 0x00000017
+#define G80_PGRAPH_DATA_ERROR_FP_INTERPOLANT_START_OVER_LIMIT 0x00000018
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_LAYER_OVER_LIMIT 0x00000019
+#define G80_PGRAPH_DATA_ERROR_RT_INVALID_ALIGNMENT 0x0000001a
+#define G80_PGRAPH_DATA_ERROR_SAMPLER_OVER_LIMIT 0x0000001b
+#define G80_PGRAPH_DATA_ERROR_TEXTURE_OVER_LIMIT 0x0000001c
+#define G80_PGRAPH_DATA_ERROR_GP_TOO_MANY_OUTPUTS 0x0000001e
+#define G80_PGRAPH_DATA_ERROR_RT_BPP128_WITH_MS8 0x0000001f
+#define G80_PGRAPH_DATA_ERROR_Z_OUT_OF_BOUNDS 0x00000021
+#define G80_PGRAPH_DATA_ERROR_XY_OUT_OF_BOUNDS 0x00000023
+#define G80_PGRAPH_DATA_ERROR_VP_ZERO_INPUTS 0x00000024
+#define G80_PGRAPH_DATA_ERROR_CP_MORE_PARAMS_THAN_SHARED 0x00000027
+#define G80_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_STRIPED 0x00000028
+#define G80_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_PACKED 0x00000029
+#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_WARPS 0x0000002a
+#define G80_PGRAPH_DATA_ERROR_CP_BLOCK_SIZE_MISMATCH 0x0000002b
+#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_LOCAL_WARPS 0x0000002c
+#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_STACK_WARPS 0x0000002d
+#define G80_PGRAPH_DATA_ERROR_CP_NO_BLOCKDIM_LATCH 0x0000002e
+#define G80_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH 0x00000031
+#define G80_PGRAPH_DATA_ERROR_ENG2D_OPERATION_ILLEGAL_FOR_DST_FORMAT 0x00000033
+#define G80_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH_B 0x00000034
+#define G80_PGRAPH_DATA_ERROR_PRIMITIVE_ID_NEEDS_GP 0x0000003f
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_VIEWPORT_OVER_LIMIT 0x00000044
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_FRONT_OVER_LIMIT 0x00000045
+#define G80_PGRAPH_DATA_ERROR_LAYER_ID_NEEDS_GP 0x00000046
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_CLIP_OVER_LIMIT 0x00000047
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_PTSZ_OVER_LIMIT 0x00000048
+#define G80_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_IN 0x00000051
+#define G80_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_OUT 0x00000053
+#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_ZETA_GF100 0x00000098
+#define G80_PGRAPH_DATA_ERROR_ENG2D_UNALIGNED_PITCH_GF100 0x000000a5
+#define G80_CG_IDLE_TIMEOUT__MASK 0x0000003f
+#define G80_CG_IDLE_TIMEOUT__SHIFT 0
+#define G80_CG_IDLE_TIMEOUT_ENABLE 0x00000040
+#define G80_CG_INTERFACE_REENABLE_TIME__MASK 0x000f0000
+#define G80_CG_INTERFACE_REENABLE_TIME__SHIFT 16
+#define G80_CG_THROTTLE_DUTY_M1__MASK 0x00f00000
+#define G80_CG_THROTTLE_DUTY_M1__SHIFT 20
+#define G80_CG_DELAY__MASK 0x0f000000
+#define G80_CG_DELAY__SHIFT 24
+#define G80_CG_CLOCK_THROTTLE_ENABLE 0x10000000
+#define G80_CG_THROTTLE_MODE__MASK 0x20000000
+#define G80_CG_THROTTLE_MODE__SHIFT 29
+#define G80_CG_THROTTLE_MODE_AUTO 0x00000000
+#define G80_CG_THROTTLE_MODE_MANUAL 0x20000000
+#define G80_CG_INTERFACE_THROTTLE_ENABLE 0x40000000
+#define G80_QUERY__SIZE 0x00000010
+#define G80_QUERY_COUNTER 0x00000000
+
+#define G80_QUERY_RES 0x00000004
+
+#define G80_QUERY_TIME 0x00000008
+
+
+#endif /* G80_DEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h b/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h
new file mode 100644
index 00000000000..542963ca452
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h
@@ -0,0 +1,451 @@
+#ifndef G80_TEXTURE_XML
+#define G80_TEXTURE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/g80_texture.xml ( 18837 bytes, from 2016-01-14 23:54:22)
+- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40)
+- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-02 23:45:00)
+- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42)
+- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 5388 bytes, from 2016-01-14 23:54:22)
+
+Copyright (C) 2006-2016 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define G80_TSC_WRAP_WRAP 0x00000000
+#define G80_TSC_WRAP_MIRROR 0x00000001
+#define G80_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
+#define G80_TSC_WRAP_BORDER 0x00000003
+#define G80_TSC_WRAP_CLAMP_OGL 0x00000004
+#define G80_TSC_WRAP_MIRROR_ONCE_CLAMP_TO_EDGE 0x00000005
+#define G80_TSC_WRAP_MIRROR_ONCE_BORDER 0x00000006
+#define G80_TSC_WRAP_MIRROR_ONCE_CLAMP_OGL 0x00000007
+#define G80_TIC__SIZE 0x00000020
+#define G80_TIC_0 0x00000000
+#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__MASK 0x80000000
+#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT 31
+#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED_NO 0x00000000
+#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED_YES 0x80000000
+#define G84_TIC_0_PACK_COMPONENTS 0x40000000
+#define G80_TIC_0_W_SOURCE__MASK 0x38000000
+#define G80_TIC_0_W_SOURCE__SHIFT 27
+#define G80_TIC_0_Z_SOURCE__MASK 0x07000000
+#define G80_TIC_0_Z_SOURCE__SHIFT 24
+#define G80_TIC_0_Y_SOURCE__MASK 0x00e00000
+#define G80_TIC_0_Y_SOURCE__SHIFT 21
+#define G80_TIC_0_X_SOURCE__MASK 0x001c0000
+#define G80_TIC_0_X_SOURCE__SHIFT 18
+#define G80_TIC_0_A_DATA_TYPE__MASK 0x00038000
+#define G80_TIC_0_A_DATA_TYPE__SHIFT 15
+#define G80_TIC_0_B_DATA_TYPE__MASK 0x00007000
+#define G80_TIC_0_B_DATA_TYPE__SHIFT 12
+#define G80_TIC_0_G_DATA_TYPE__MASK 0x00000e00
+#define G80_TIC_0_G_DATA_TYPE__SHIFT 9
+#define G80_TIC_0_R_DATA_TYPE__MASK 0x000001c0
+#define G80_TIC_0_R_DATA_TYPE__SHIFT 6
+#define G80_TIC_0_COMPONENTS_SIZES__MASK 0x0000003f
+#define G80_TIC_0_COMPONENTS_SIZES__SHIFT 0
+#define G80_TIC_0_COMPONENTS_SIZES_R32_G32_B32_A32 0x00000001
+#define GF100_TIC_0_COMPONENTS_SIZES_R32_G32_B32 0x00000002
+#define G80_TIC_0_COMPONENTS_SIZES_R16_G16_B16_A16 0x00000003
+#define G80_TIC_0_COMPONENTS_SIZES_R32_G32 0x00000004
+#define G80_TIC_0_COMPONENTS_SIZES_R32_B24G8 0x00000005
+#define G80_TIC_0_COMPONENTS_SIZES_X8B8G8R8 0x00000007
+#define G80_TIC_0_COMPONENTS_SIZES_A8B8G8R8 0x00000008
+#define G80_TIC_0_COMPONENTS_SIZES_A2B10G10R10 0x00000009
+#define G80_TIC_0_COMPONENTS_SIZES_R16_G16 0x0000000c
+#define G80_TIC_0_COMPONENTS_SIZES_G8R24 0x0000000d
+#define G80_TIC_0_COMPONENTS_SIZES_G24R8 0x0000000e
+#define G80_TIC_0_COMPONENTS_SIZES_R32 0x0000000f
+#define G80_TIC_0_COMPONENTS_SIZES_A4B4G4R4 0x00000012
+#define G80_TIC_0_COMPONENTS_SIZES_A5B5G5R1 0x00000013
+#define G80_TIC_0_COMPONENTS_SIZES_A1B5G5R5 0x00000014
+#define G80_TIC_0_COMPONENTS_SIZES_B5G6R5 0x00000015
+#define G80_TIC_0_COMPONENTS_SIZES_B6G5R5 0x00000016
+#define G80_TIC_0_COMPONENTS_SIZES_G8R8 0x00000018
+#define G80_TIC_0_COMPONENTS_SIZES_R16 0x0000001b
+#define G80_TIC_0_COMPONENTS_SIZES_Y8_VIDEO 0x0000001c
+#define G80_TIC_0_COMPONENTS_SIZES_R8 0x0000001d
+#define G80_TIC_0_COMPONENTS_SIZES_G4R4 0x0000001e
+#define G80_TIC_0_COMPONENTS_SIZES_R1 0x0000001f
+#define G80_TIC_0_COMPONENTS_SIZES_E5B9G9R9_SHAREDEXP 0x00000020
+#define G80_TIC_0_COMPONENTS_SIZES_BF10GF11RF11 0x00000021
+#define G80_TIC_0_COMPONENTS_SIZES_G8B8G8R8 0x00000022
+#define G80_TIC_0_COMPONENTS_SIZES_B8G8R8G8 0x00000023
+#define G80_TIC_0_COMPONENTS_SIZES_DXT1 0x00000024
+#define G80_TIC_0_COMPONENTS_SIZES_DXT23 0x00000025
+#define G80_TIC_0_COMPONENTS_SIZES_DXT45 0x00000026
+#define G80_TIC_0_COMPONENTS_SIZES_DXN1 0x00000027
+#define G80_TIC_0_COMPONENTS_SIZES_DXN2 0x00000028
+#define GF100_TIC_0_COMPONENTS_SIZES_BC6H_SF16 0x00000010
+#define GF100_TIC_0_COMPONENTS_SIZES_BC6H_UF16 0x00000011
+#define GF100_TIC_0_COMPONENTS_SIZES_BC7U 0x00000017
+#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGB 0x00000006
+#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGB_PTA 0x0000000a
+#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGBA 0x0000000b
+#define GK20A_TIC_0_COMPONENTS_SIZES_EAC 0x00000019
+#define GK20A_TIC_0_COMPONENTS_SIZES_EACX2 0x0000001a
+#define G80_TIC_0_COMPONENTS_SIZES_Z24S8 0x00000029
+#define G80_TIC_0_COMPONENTS_SIZES_X8Z24 0x0000002a
+#define G80_TIC_0_COMPONENTS_SIZES_S8Z24 0x0000002b
+#define G80_TIC_0_COMPONENTS_SIZES_X4V4Z24__COV4R4V 0x0000002c
+#define G80_TIC_0_COMPONENTS_SIZES_X4V4Z24__COV8R8V 0x0000002d
+#define G80_TIC_0_COMPONENTS_SIZES_V8Z24__COV4R12V 0x0000002e
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32 0x0000002f
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X24S8 0x00000030
+#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV4R4V 0x00000031
+#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV8R8V 0x00000032
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV4R4V 0x00000033
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV8R8V 0x00000034
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV4R4V 0x00000035
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV8R8V 0x00000036
+#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV4R12V 0x00000037
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV4R12V 0x00000038
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV4R12V 0x00000039
+#define G200_TIC_0_COMPONENTS_SIZES_Z16 0x0000003a
+#define G200_TIC_0_COMPONENTS_SIZES_V8Z24__COV8R24V 0x0000003b
+#define G200_TIC_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV8R24V 0x0000003c
+#define G200_TIC_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV8R24V 0x0000003d
+#define G200_TIC_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV8R24V 0x0000003e
+#define G80_TIC_0_COMPONENTS_SIZES__MASK 0x0000003f
+#define G80_TIC_0_COMPONENTS_SIZES__SHIFT 0
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_4X4 0x00000000
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_5X4 0x00000010
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_5X5 0x00000001
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_6X5 0x00000011
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_6X6 0x00000002
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X5 0x00000015
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X6 0x00000012
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X8 0x00000004
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X5 0x00000016
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X6 0x00000017
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X8 0x00000013
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X10 0x00000005
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_12X10 0x00000014
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_12X12 0x00000006
+
+#define G80_TIC_1 0x00000004
+#define G80_TIC_1_OFFSET_LOWER__MASK 0xffffffff
+#define G80_TIC_1_OFFSET_LOWER__SHIFT 0
+
+#define G80_TIC_2 0x00000008
+#define G80_TIC_2_OFFSET_UPPER__MASK 0x000000ff
+#define G80_TIC_2_OFFSET_UPPER__SHIFT 0
+#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_LSB__MASK 0x00000300
+#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_LSB__SHIFT 8
+#define G80_TIC_2_SRGB_CONVERSION 0x00000400
+#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_MSB 0x00000800
+#define G80_TIC_2_LOD_ANISO_QUALITY_2 0x00001000
+#define G80_TIC_2_COLOR_KEY_OP 0x00002000
+#define G80_TIC_2_TEXTURE_TYPE__MASK 0x0003c000
+#define G80_TIC_2_TEXTURE_TYPE__SHIFT 14
+#define G80_TIC_2_TEXTURE_TYPE_ONE_D 0x00000000
+#define G80_TIC_2_TEXTURE_TYPE_TWO_D 0x00004000
+#define G80_TIC_2_TEXTURE_TYPE_THREE_D 0x00008000
+#define G80_TIC_2_TEXTURE_TYPE_CUBEMAP 0x0000c000
+#define G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY 0x00010000
+#define G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY 0x00014000
+#define G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER 0x00018000
+#define G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP 0x0001c000
+#define G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY 0x00020000
+#define G80_TIC_2_LAYOUT__MASK 0x00040000
+#define G80_TIC_2_LAYOUT__SHIFT 18
+#define G80_TIC_2_LAYOUT_BLOCKLINEAR 0x00000000
+#define G80_TIC_2_LAYOUT_PITCH 0x00040000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MASK 0x00380000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__SHIFT 19
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MIN 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MAX 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_ONE 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_TWO 0x00080000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_FOUR 0x00100000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_EIGHT 0x00180000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_SIXTEEN 0x00200000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_THIRTYTWO 0x00280000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT__MASK 0x01c00000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT__SHIFT 22
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_ONE 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_TWO 0x00400000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_FOUR 0x00800000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_EIGHT 0x00c00000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_SIXTEEN 0x01000000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_THIRTYTWO 0x01400000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH__MASK 0x0e000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH__SHIFT 25
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_ONE 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_TWO 0x02000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_FOUR 0x04000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_EIGHT 0x06000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_SIXTEEN 0x08000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_THIRTYTWO 0x0a000000
+#define G80_TIC_2_SECTOR_PROMOTION__MASK 0x30000000
+#define G80_TIC_2_SECTOR_PROMOTION__SHIFT 28
+#define G80_TIC_2_SECTOR_PROMOTION_NO_PROMOTION 0x00000000
+#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_2_V 0x10000000
+#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_2_H 0x20000000
+#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_4 0x30000000
+#define G80_TIC_2_BORDER_SOURCE__MASK 0x40000000
+#define G80_TIC_2_BORDER_SOURCE__SHIFT 30
+#define G80_TIC_2_BORDER_SOURCE_TEXTURE 0x00000000
+#define G80_TIC_2_BORDER_SOURCE_COLOR 0x40000000
+#define G80_TIC_2_NORMALIZED_COORDS 0x80000000
+
+#define G80_TIC_3 0x0000000c
+#define G80_TIC_3_PITCH__MASK 0x000fffff
+#define G80_TIC_3_PITCH__SHIFT 0
+#define G80_TIC_3_LOD_ANISO_QUALITY__MASK 0x00100000
+#define G80_TIC_3_LOD_ANISO_QUALITY__SHIFT 20
+#define G80_TIC_3_LOD_ANISO_QUALITY_LOW 0x00000000
+#define G80_TIC_3_LOD_ANISO_QUALITY_HIGH 0x00100000
+#define G80_TIC_3_LOD_ISO_QUALITY__MASK 0x00200000
+#define G80_TIC_3_LOD_ISO_QUALITY__SHIFT 21
+#define G80_TIC_3_LOD_ISO_QUALITY_LOW 0x00000000
+#define G80_TIC_3_LOD_ISO_QUALITY_HIGH 0x00200000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER__MASK 0x00c00000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER__SHIFT 22
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_NONE 0x00000000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_ONE 0x00400000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_TWO 0x00800000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_SQRT 0x00c00000
+#define G80_TIC_3_ANISO_SPREAD_SCALE__MASK 0x1f000000
+#define G80_TIC_3_ANISO_SPREAD_SCALE__SHIFT 24
+#define G80_TIC_3_USE_HEADER_OPT_CONTROL 0x20000000
+#define G84_TIC_3_ANISO_CLAMP_AT_MAX_LOD 0x40000000
+#define G84_TIC_3_ANISO_POW2 0x80000000
+
+#define G80_TIC_4 0x00000010
+#define G80_TIC_4_WIDTH__MASK 0x3fffffff
+#define G80_TIC_4_WIDTH__SHIFT 0
+#define G80_TIC_4_DEPTH_TEXTURE 0x40000000
+#define G84_TIC_4_USE_TEXTURE_HEADER_V2 0x80000000
+
+#define G80_TIC_5 0x00000014
+#define G80_TIC_5_MAP_MIP_LEVEL__MASK 0xf0000000
+#define G80_TIC_5_MAP_MIP_LEVEL__SHIFT 28
+#define G80_TIC_5_DEPTH__MASK 0x0fff0000
+#define G80_TIC_5_DEPTH__SHIFT 16
+#define G80_TIC_5_HEIGHT__MASK 0x0000ffff
+#define G80_TIC_5_HEIGHT__SHIFT 0
+
+#define G80_TIC_6 0x00000018
+#define G80_TIC_6_TRILIN_OPT__MASK 0x0000001f
+#define G80_TIC_6_TRILIN_OPT__SHIFT 0
+#define G80_TIC_6_MIP_LOD_BIAS__MASK 0x0003ffe0
+#define G80_TIC_6_MIP_LOD_BIAS__SHIFT 5
+#define G80_TIC_6_MIP_LOD_BIAS__RADIX 0x00000008
+#define G80_TIC_6_ANISO_BIAS__MASK 0x00780000
+#define G80_TIC_6_ANISO_BIAS__SHIFT 19
+#define G80_TIC_6_ANISO_BIAS__RADIX 0x00000004
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC__MASK 0x01800000
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC__SHIFT 23
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_HALF 0x00000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_ONE 0x00800000
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_TWO 0x01000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_MAX 0x01800000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC__MASK 0x06000000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC__SHIFT 25
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_HALF 0x00000000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_ONE 0x02000000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_TWO 0x04000000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_MAX 0x06000000
+#define G80_TIC_6_MAX_ANISOTROPY__MASK 0x38000000
+#define G80_TIC_6_MAX_ANISOTROPY__SHIFT 27
+#define G80_TIC_6_MAX_ANISOTROPY_1_TO_1 0x00000000
+#define G80_TIC_6_MAX_ANISOTROPY_2_TO_1 0x08000000
+#define G80_TIC_6_MAX_ANISOTROPY_4_TO_1 0x10000000
+#define G80_TIC_6_MAX_ANISOTROPY_6_TO_1 0x18000000
+#define G80_TIC_6_MAX_ANISOTROPY_8_TO_1 0x20000000
+#define G80_TIC_6_MAX_ANISOTROPY_10_TO_1 0x28000000
+#define G80_TIC_6_MAX_ANISOTROPY_12_TO_1 0x30000000
+#define G80_TIC_6_MAX_ANISOTROPY_16_TO_1 0x38000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER__MASK 0xc0000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER__SHIFT 30
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_NONE 0x00000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_CONST_ONE 0x40000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO 0x80000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_SQRT 0xc0000000
+
+#define G80_TIC_7 0x0000001c
+#define G80_TIC_7_COLOR_KEY_VALUE__MASK 0xffffffff
+#define G80_TIC_7_COLOR_KEY_VALUE__SHIFT 0
+
+#define G84_TIC_7 0x0000001c
+#define G84_TIC_7_RES_VIEW_MIN_MIP_LEVEL__MASK 0x0000000f
+#define G84_TIC_7_RES_VIEW_MIN_MIP_LEVEL__SHIFT 0
+#define G84_TIC_7_RES_VIEW_MAX_MIP_LEVEL__MASK 0x000000f0
+#define G84_TIC_7_RES_VIEW_MAX_MIP_LEVEL__SHIFT 4
+#define G84_TIC_7_HEIGHT_MSB 0x00000100
+#define G84_TIC_7_MULTI_SAMPLE_COUNT__MASK 0x0000f000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT__SHIFT 12
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_1X1 0x00000000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X1 0x00001000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2 0x00002000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_4X2 0x00003000
+#define GT215_TIC_7_MULTI_SAMPLE_COUNT_4X2_D3D 0x00004000
+#define GT215_TIC_7_MULTI_SAMPLE_COUNT_2X1_D3D 0x00005000
+#define GF100_TIC_7_MULTI_SAMPLE_COUNT_4X4 0x00006000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2_VC_4 0x00008000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2_VC_12 0x00009000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_4X2_VC_8 0x0000a000
+#define GF100_TIC_7_MULTI_SAMPLE_COUNT_4X2_VC_24 0x0000b000
+#define G84_TIC_7_MIN_LOD_CLAMP__MASK 0x0fff0000
+#define G84_TIC_7_MIN_LOD_CLAMP__SHIFT 16
+#define G84_TIC_7_MIN_LOD_CLAMP__RADIX 0x00000008
+#define G84_TIC_7_DEPTH_MSB__MASK 0x70000000
+#define G84_TIC_7_DEPTH_MSB__SHIFT 28
+
+#define G80_TSC__SIZE 0x00000020
+#define G80_TSC_0 0x00000000
+#define G80_TSC_0_ADDRESS_U__MASK 0x00000007
+#define G80_TSC_0_ADDRESS_U__SHIFT 0
+#define G80_TSC_0_ADDRESS_V__MASK 0x00000038
+#define G80_TSC_0_ADDRESS_V__SHIFT 3
+#define G80_TSC_0_ADDRESS_P__MASK 0x000001c0
+#define G80_TSC_0_ADDRESS_P__SHIFT 6
+#define G80_TSC_0_DEPTH_COMPARE 0x00000200
+#define G80_TSC_0_DEPTH_COMPARE_FUNC__MASK 0x00001c00
+#define G80_TSC_0_DEPTH_COMPARE_FUNC__SHIFT 10
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_NEVER 0x00000000
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_LESS 0x00000400
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_EQUAL 0x00000800
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_LEQUAL 0x00000c00
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_GREATER 0x00001000
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_NOTEQUAL 0x00001400
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_GEQUAL 0x00001800
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_ALWAYS 0x00001c00
+#define G80_TSC_0_SRGB_CONVERSION 0x00002000
+#define G80_TSC_0_FONT_FILTER_WIDTH__MASK 0x0001c000
+#define G80_TSC_0_FONT_FILTER_WIDTH__SHIFT 14
+#define G80_TSC_0_FONT_FILTER_HEIGHT__MASK 0x000e0000
+#define G80_TSC_0_FONT_FILTER_HEIGHT__SHIFT 17
+#define G80_TSC_0_MAX_ANISOTROPY__MASK 0x00700000
+#define G80_TSC_0_MAX_ANISOTROPY__SHIFT 20
+#define G80_TSC_0_MAX_ANISOTROPY_1_TO_1 0x00000000
+#define G80_TSC_0_MAX_ANISOTROPY_2_TO_1 0x00100000
+#define G80_TSC_0_MAX_ANISOTROPY_4_TO_1 0x00200000
+#define G80_TSC_0_MAX_ANISOTROPY_6_TO_1 0x00300000
+#define G80_TSC_0_MAX_ANISOTROPY_8_TO_1 0x00400000
+#define G80_TSC_0_MAX_ANISOTROPY_10_TO_1 0x00500000
+#define G80_TSC_0_MAX_ANISOTROPY_12_TO_1 0x00600000
+#define G80_TSC_0_MAX_ANISOTROPY_16_TO_1 0x00700000
+
+#define G80_TSC_1 0x00000004
+#define G80_TSC_1_MAG_FILTER__MASK 0x00000003
+#define G80_TSC_1_MAG_FILTER__SHIFT 0
+#define G80_TSC_1_MAG_FILTER_NEAREST 0x00000001
+#define G80_TSC_1_MAG_FILTER_LINEAR 0x00000002
+#define G80_TSC_1_MIN_FILTER__MASK 0x00000030
+#define G80_TSC_1_MIN_FILTER__SHIFT 4
+#define G80_TSC_1_MIN_FILTER_NEAREST 0x00000010
+#define G80_TSC_1_MIN_FILTER_LINEAR 0x00000020
+#define G80_TSC_1_MIP_FILTER__MASK 0x000000c0
+#define G80_TSC_1_MIP_FILTER__SHIFT 6
+#define G80_TSC_1_MIP_FILTER_NONE 0x00000040
+#define G80_TSC_1_MIP_FILTER_NEAREST 0x00000080
+#define G80_TSC_1_MIP_FILTER_LINEAR 0x000000c0
+#define GK104_TSC_1_CUBEMAP_INTERFACE_FILTERING 0x00000200
+#define G80_TSC_1_MIP_LOD_BIAS__MASK 0x01fff000
+#define G80_TSC_1_MIP_LOD_BIAS__SHIFT 12
+#define G80_TSC_1_MIP_LOD_BIAS__RADIX 0x00000008
+#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION__MASK 0x02000000
+#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION__SHIFT 25
+#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION_USE_HEADER_SETTING 0x00000000
+#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION_FORCE_UNNORMALIZED_COORDS 0x02000000
+#define G80_TSC_1_TRILIN_OPT__MASK 0x7c000000
+#define G80_TSC_1_TRILIN_OPT__SHIFT 26
+
+#define G80_TSC_2 0x00000008
+#define G80_TSC_2_MIN_LOD_CLAMP__MASK 0x00000fff
+#define G80_TSC_2_MIN_LOD_CLAMP__SHIFT 0
+#define G80_TSC_2_MIN_LOD_CLAMP__RADIX 0x00000008
+#define G80_TSC_2_MAX_LOD_CLAMP__MASK 0x00fff000
+#define G80_TSC_2_MAX_LOD_CLAMP__SHIFT 12
+#define G80_TSC_2_MAX_LOD_CLAMP__RADIX 0x00000008
+#define G80_TSC_2_SRGB_BORDER_COLOR_R__MASK 0xff000000
+#define G80_TSC_2_SRGB_BORDER_COLOR_R__SHIFT 24
+
+#define G80_TSC_3 0x0000000c
+#define G80_TSC_3_SRGB_BORDER_COLOR_G__MASK 0x000ff000
+#define G80_TSC_3_SRGB_BORDER_COLOR_G__SHIFT 12
+#define G80_TSC_3_SRGB_BORDER_COLOR_B__MASK 0x0ff00000
+#define G80_TSC_3_SRGB_BORDER_COLOR_B__SHIFT 20
+
+#define G80_TSC_4 0x00000010
+#define G80_TSC_4_BORDER_COLOR_R__MASK 0xffffffff
+#define G80_TSC_4_BORDER_COLOR_R__SHIFT 0
+
+#define G80_TSC_5 0x00000014
+#define G80_TSC_5_BORDER_COLOR_G__MASK 0xffffffff
+#define G80_TSC_5_BORDER_COLOR_G__SHIFT 0
+
+#define G80_TSC_6 0x00000018
+#define G80_TSC_6_BORDER_COLOR_B__MASK 0xffffffff
+#define G80_TSC_6_BORDER_COLOR_B__SHIFT 0
+
+#define G80_TSC_7 0x0000001c
+#define G80_TSC_7_BORDER_COLOR_A__MASK 0xffffffff
+#define G80_TSC_7_BORDER_COLOR_A__SHIFT 0
+
+
+#endif /* G80_TEXTURE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
index 6d23fd66945..04488d6d0a6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -270,13 +270,11 @@ nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
}
void
-nv50_launch_grid(struct pipe_context *pipe,
- const uint *block_layout, const uint *grid_layout,
- uint32_t label, const void *input)
+nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
- unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2];
+ unsigned block_size = info->block[0] * info->block[1] * info->block[2];
struct nv50_program *cp = nv50->compprog;
bool ret;
@@ -286,10 +284,10 @@ nv50_launch_grid(struct pipe_context *pipe,
return;
}
- nv50_compute_upload_input(nv50, input);
+ nv50_compute_upload_input(nv50, info->input);
BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
- PUSH_DATA (push, nv50_compute_find_symbol(nv50, label));
+ PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
@@ -298,14 +296,14 @@ nv50_launch_grid(struct pipe_context *pipe,
/* grid/block setup */
BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
- PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]);
- PUSH_DATA (push, block_layout[2]);
+ PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
+ PUSH_DATA (push, info->block[2]);
BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
PUSH_DATA (push, 1 << 16 | block_size);
BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
- PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]);
+ PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 342ec96d62c..2620d03b999 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -153,6 +153,7 @@ struct nv50_context {
uint32_t textures_coherent[3];
struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
unsigned num_samplers[3];
+ bool seamless_cube_map;
uint8_t num_so_targets;
uint8_t so_targets_dirty;
@@ -322,7 +323,6 @@ nv98_video_buffer_create(struct pipe_context *pipe,
/* nv50_compute.c */
void
-nv50_launch_grid(struct pipe_context *, const uint *, const uint *,
- uint32_t, const void *);
+nv50_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h
deleted file mode 100644
index aad2a851691..00000000000
--- a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h
+++ /dev/null
@@ -1,263 +0,0 @@
-#ifndef NV50_DEFS_XML
-#define NV50_DEFS_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/envytools/envytools/
-git clone https://github.com/envytools/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- rnndb/g80_defs.xml ( 18175 bytes, from 2014-09-25 06:32:11)
-- rnndb/copyright.xml ( 6452 bytes, from 2013-05-14 03:57:49)
-- rnndb/nvchipsets.xml ( 2759 bytes, from 2014-10-05 01:51:02)
-
-Copyright (C) 2006-2014 by the following authors:
-- Artur Huillet <[email protected]> (ahuillet)
-- Ben Skeggs (darktama, darktama_)
-- B. R. <[email protected]> (koala_br)
-- Carlos Martin <[email protected]> (carlosmn)
-- Christoph Bumiller <[email protected]> (calim, chrisbmr)
-- Dawid Gajownik <[email protected]> (gajownik)
-- Dmitry Baryshkov
-- Dmitry Eremin-Solenikov <[email protected]> (lumag)
-- EdB <[email protected]> (edb_)
-- Erik Waling <[email protected]> (erikwaling)
-- Francisco Jerez <[email protected]> (curro)
-- imirkin <[email protected]> (imirkin)
-- jb17bsome <[email protected]> (jb17bsome)
-- Jeremy Kolb <[email protected]> (kjeremy)
-- Laurent Carlier <[email protected]> (lordheavy)
-- Luca Barbieri <[email protected]> (lb, lb1)
-- Maarten Maathuis <[email protected]> (stillunknown)
-- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
-- Mark Carey <[email protected]> (careym)
-- Matthieu Castet <[email protected]> (mat-c)
-- nvidiaman <[email protected]> (nvidiaman)
-- Patrice Mandin <[email protected]> (pmandin, pmdata)
-- Pekka Paalanen <[email protected]> (pq, ppaalanen)
-- Peter Popov <[email protected]> (ironpeter)
-- Richard Hughes <[email protected]> (hughsient)
-- Rudi Cilibrasi <[email protected]> (cilibrar)
-- Serge Martin
-- Simon Raffeiner
-- Stephane Loeuillet <[email protected]> (leroutier)
-- Stephane Marchesin <[email protected]> (marcheu)
-- sturmflut <[email protected]> (sturmflut)
-- Sylvain Munaut <[email protected]>
-- Victor Stinner <[email protected]> (haypo)
-- Wladmir van der Laan <[email protected]> (miathan6)
-- Younes Manton <[email protected]> (ymanton)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-#define NV50_VSTATUS_IDLE 0x00000000
-#define NV50_VSTATUS_BUSY 0x00000001
-#define NV50_VSTATUS_UNK2 0x00000002
-#define NV50_VSTATUS_WAITING 0x00000003
-#define NV50_VSTATUS_BLOCKED 0x00000005
-#define NV50_VSTATUS_FAULTED 0x00000006
-#define NV50_VSTATUS_PAUSED 0x00000007
-#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c
-#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d
-#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0
-#define NV50_SURFACE_FORMAT_RGBA32_SINT 0x000000c1
-#define NV50_SURFACE_FORMAT_RGBA32_UINT 0x000000c2
-#define NV50_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3
-#define NV50_SURFACE_FORMAT_RGBX32_SINT 0x000000c4
-#define NV50_SURFACE_FORMAT_RGBX32_UINT 0x000000c5
-#define NV50_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6
-#define NV50_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7
-#define NV50_SURFACE_FORMAT_RGBA16_SINT 0x000000c8
-#define NV50_SURFACE_FORMAT_RGBA16_UINT 0x000000c9
-#define NV50_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca
-#define NV50_SURFACE_FORMAT_RG32_FLOAT 0x000000cb
-#define NV50_SURFACE_FORMAT_RG32_SINT 0x000000cc
-#define NV50_SURFACE_FORMAT_RG32_UINT 0x000000cd
-#define NV50_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce
-#define NV50_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf
-#define NV50_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0
-#define NV50_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1
-#define NV50_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2
-#define NV50_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5
-#define NV50_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6
-#define NV50_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7
-#define NV50_SURFACE_FORMAT_RGBA8_SINT 0x000000d8
-#define NV50_SURFACE_FORMAT_RGBA8_UINT 0x000000d9
-#define NV50_SURFACE_FORMAT_RG16_UNORM 0x000000da
-#define NV50_SURFACE_FORMAT_RG16_SNORM 0x000000db
-#define NV50_SURFACE_FORMAT_RG16_SINT 0x000000dc
-#define NV50_SURFACE_FORMAT_RG16_UINT 0x000000dd
-#define NV50_SURFACE_FORMAT_RG16_FLOAT 0x000000de
-#define NV50_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df
-#define NV50_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0
-#define NV50_SURFACE_FORMAT_R32_SINT 0x000000e3
-#define NV50_SURFACE_FORMAT_R32_UINT 0x000000e4
-#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5
-#define NV50_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6
-#define NV50_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7
-#define NV50_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8
-#define NV50_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9
-#define NV50_SURFACE_FORMAT_RG8_UNORM 0x000000ea
-#define NV50_SURFACE_FORMAT_RG8_SNORM 0x000000eb
-#define NV50_SURFACE_FORMAT_RG8_SINT 0x000000ec
-#define NV50_SURFACE_FORMAT_RG8_UINT 0x000000ed
-#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee
-#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef
-#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0
-#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1
-#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2
-#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3
-#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4
-#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5
-#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6
-#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7
-#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8
-#define NV50_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9
-#define NV50_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa
-#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb
-#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc
-#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd
-#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe
-#define NV50_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff
-#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a
-#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013
-#define NV50_ZETA_FORMAT_S8_Z24_UNORM 0x00000014
-#define NV50_ZETA_FORMAT_Z24_X8_UNORM 0x00000015
-#define NV50_ZETA_FORMAT_Z24_S8_UNORM 0x00000016
-#define NV50_ZETA_FORMAT_Z24_C8_UNORM 0x00000018
-#define NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019
-#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d
-#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e
-#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f
-#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002
-#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003
-#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004
-#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008
-#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009
-#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a
-#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b
-#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c
-#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d
-#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e
-#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f
-#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013
-#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015
-#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018
-#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a
-#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b
-#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c
-#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d
-#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e
-#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f
-#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020
-#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021
-#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024
-#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027
-#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028
-#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029
-#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e
-#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f
-#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030
-#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031
-#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032
-#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033
-#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034
-#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035
-#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036
-#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037
-#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038
-#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039
-#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a
-#define NV50_PGRAPH_DATA_ERROR_INVALID_OPERATION 0x00000003
-#define NV50_PGRAPH_DATA_ERROR_INVALID_VALUE 0x00000004
-#define NV50_PGRAPH_DATA_ERROR_INVALID_ENUM 0x00000005
-#define NV50_PGRAPH_DATA_ERROR_INVALID_OBJECT 0x00000008
-#define NV50_PGRAPH_DATA_ERROR_READ_ONLY_OBJECT 0x00000009
-#define NV50_PGRAPH_DATA_ERROR_SUPERVISOR_OBJECT 0x0000000a
-#define NV50_PGRAPH_DATA_ERROR_INVALID_ADDRESS_ALIGNMENT 0x0000000b
-#define NV50_PGRAPH_DATA_ERROR_INVALID_BITFIELD 0x0000000c
-#define NV50_PGRAPH_DATA_ERROR_BEGIN_END_ACTIVE 0x0000000d
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_BACK_OVER_LIMIT 0x0000000e
-#define NV50_PGRAPH_DATA_ERROR_VIEWPORT_ID_NEEDS_GP 0x0000000f
-#define NV50_PGRAPH_DATA_ERROR_RT_DOUBLE_BIND 0x00000010
-#define NV50_PGRAPH_DATA_ERROR_RT_TYPES_MISMATCH 0x00000011
-#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_ZETA 0x00000012
-#define NV50_PGRAPH_DATA_ERROR_FP_TOO_FEW_REGS 0x00000015
-#define NV50_PGRAPH_DATA_ERROR_ZETA_FORMAT_CSAA_MISMATCH 0x00000016
-#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_MSAA 0x00000017
-#define NV50_PGRAPH_DATA_ERROR_FP_INTERPOLANT_START_OVER_LIMIT 0x00000018
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_LAYER_OVER_LIMIT 0x00000019
-#define NV50_PGRAPH_DATA_ERROR_RT_INVALID_ALIGNMENT 0x0000001a
-#define NV50_PGRAPH_DATA_ERROR_SAMPLER_OVER_LIMIT 0x0000001b
-#define NV50_PGRAPH_DATA_ERROR_TEXTURE_OVER_LIMIT 0x0000001c
-#define NV50_PGRAPH_DATA_ERROR_GP_TOO_MANY_OUTPUTS 0x0000001e
-#define NV50_PGRAPH_DATA_ERROR_RT_BPP128_WITH_MS8 0x0000001f
-#define NV50_PGRAPH_DATA_ERROR_Z_OUT_OF_BOUNDS 0x00000021
-#define NV50_PGRAPH_DATA_ERROR_XY_OUT_OF_BOUNDS 0x00000023
-#define NV50_PGRAPH_DATA_ERROR_VP_ZERO_INPUTS 0x00000024
-#define NV50_PGRAPH_DATA_ERROR_CP_MORE_PARAMS_THAN_SHARED 0x00000027
-#define NV50_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_STRIPED 0x00000028
-#define NV50_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_PACKED 0x00000029
-#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_WARPS 0x0000002a
-#define NV50_PGRAPH_DATA_ERROR_CP_BLOCK_SIZE_MISMATCH 0x0000002b
-#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_LOCAL_WARPS 0x0000002c
-#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_STACK_WARPS 0x0000002d
-#define NV50_PGRAPH_DATA_ERROR_CP_NO_BLOCKDIM_LATCH 0x0000002e
-#define NV50_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH 0x00000031
-#define NV50_PGRAPH_DATA_ERROR_ENG2D_OPERATION_ILLEGAL_FOR_DST_FORMAT 0x00000033
-#define NV50_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH_B 0x00000034
-#define NV50_PGRAPH_DATA_ERROR_PRIMITIVE_ID_NEEDS_GP 0x0000003f
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_VIEWPORT_OVER_LIMIT 0x00000044
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_FRONT_OVER_LIMIT 0x00000045
-#define NV50_PGRAPH_DATA_ERROR_LAYER_ID_NEEDS_GP 0x00000046
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_CLIP_OVER_LIMIT 0x00000047
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_PTSZ_OVER_LIMIT 0x00000048
-#define NV50_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_IN 0x00000051
-#define NV50_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_OUT 0x00000053
-#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_ZETA_GF100 0x00000098
-#define NV50_PGRAPH_DATA_ERROR_ENG2D_UNALIGNED_PITCH_GF100 0x000000a5
-#define NV50_CG_IDLE_TIMEOUT__MASK 0x0000003f
-#define NV50_CG_IDLE_TIMEOUT__SHIFT 0
-#define NV50_CG_IDLE_TIMEOUT_ENABLE 0x00000040
-#define NV50_CG_INTERFACE_REENABLE_TIME__MASK 0x000f0000
-#define NV50_CG_INTERFACE_REENABLE_TIME__SHIFT 16
-#define NV50_CG_THROTTLE_DUTY_M1__MASK 0x00f00000
-#define NV50_CG_THROTTLE_DUTY_M1__SHIFT 20
-#define NV50_CG_DELAY__MASK 0x0f000000
-#define NV50_CG_DELAY__SHIFT 24
-#define NV50_CG_CLOCK_THROTTLE_ENABLE 0x10000000
-#define NV50_CG_THROTTLE_MODE__MASK 0x20000000
-#define NV50_CG_THROTTLE_MODE__SHIFT 29
-#define NV50_CG_THROTTLE_MODE_AUTO 0x00000000
-#define NV50_CG_THROTTLE_MODE_MANUAL 0x20000000
-#define NV50_CG_INTERFACE_THROTTLE_ENABLE 0x40000000
-#define NV50_QUERY__SIZE 0x00000010
-#define NV50_QUERY_COUNTER 0x00000000
-
-#define NV50_QUERY_RES 0x00000004
-
-#define NV50_QUERY_TIME 0x00000008
-
-
-#endif /* NV50_DEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 49a93bf1d91..717067cf2f7 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -27,8 +27,8 @@
# include "nv50/nv50_screen.h"
# include "nv50/nv50_3d.xml.h"
#endif
-#include "nv50/nv50_texture.xml.h"
-#include "nv50/nv50_defs.xml.h"
+#include "nv50/g80_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
#include "pipe/p_defines.h"
@@ -39,10 +39,8 @@
* C: render target (color), blendable only on nvc0
* D: scanout/display target, blendable
* Z: depth/stencil
- * V: vertex fetch
* I: image / surface, implies T
*/
-#define U_V PIPE_BIND_VERTEX_BUFFER
#define U_T PIPE_BIND_SAMPLER_VIEW
#define U_I PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE
#define U_TR PIPE_BIND_RENDER_TARGET | U_T
@@ -51,38 +49,273 @@
#define U_IB PIPE_BIND_BLENDABLE | U_IR
#define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB
#define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T
-#define U_TV U_V | U_T
-#define U_TRV U_V | U_TR
-#define U_IRV U_V | U_IR
-#define U_TBV U_V | U_TB
-#define U_IBV U_V | U_IB
-#define U_TDV U_V | U_TD
#if NOUVEAU_DRIVER == 0xc0
# define U_TC U_TB
# define U_IC U_IB
-# define U_TCV U_TBV
-# define U_ICV U_IBV
# define U_t U_T
-# define U_tV U_TV
#else
# define U_TC U_TR
# define U_IC U_IR
-# define U_TCV U_TRV
-# define U_ICV U_IRV
# define U_t 0
-# define U_tV U_V
#endif
-#define NV50_ZETA_FORMAT_NONE 0
-#define NV50_SURFACE_FORMAT_NONE 0
+#define G80_ZETA_FORMAT_NONE 0
+#define G80_SURFACE_FORMAT_NONE 0
-/* for vertex buffers: */
-#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8
-#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
-#define NV50_TIC_0_FMT_32_32_32 NVC0_TIC_0_FMT_32_32_32
-#define NV50_TIC_0_FMT_BPTC NVC0_TIC_0_FMT_BPTC
-#define NV50_TIC_0_FMT_BPTC_FLOAT NVC0_TIC_0_FMT_BPTC_FLOAT
-#define NV50_TIC_0_FMT_BPTC_UFLOAT NVC0_TIC_0_FMT_BPTC_UFLOAT
+#define SF_A(sz) G80_TIC_0_COMPONENTS_SIZES_##sz
+#define SF_B(sz) G200_TIC_0_COMPONENTS_SIZES_##sz
+#define SF_C(sz) GF100_TIC_0_COMPONENTS_SIZES_##sz
+#define SF(c, pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \
+ [PIPE_FORMAT_##pf] = { \
+ sf, { \
+ SF_##c(sz), \
+ G80_TIC_TYPE_##t0, \
+ G80_TIC_TYPE_##t1, \
+ G80_TIC_TYPE_##t2, \
+ G80_TIC_TYPE_##t3, \
+ G80_TIC_SOURCE_##r, \
+ G80_TIC_SOURCE_##g, \
+ G80_TIC_SOURCE_##b, \
+ G80_TIC_SOURCE_##a, \
+ }, U_##u \
+ }
+
+#define C4(c, p, n, r, g, b, a, t, s, u) \
+ SF(c, p, G80_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u)
+
+#define ZX(c, p, n, r, g, b, a, t, s, u) \
+ SF(c, p, G80_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
+#define ZS(c, p, n, r, g, b, a, t, s, u) \
+ SF(c, p, G80_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
+#define SZ(c, p, n, r, g, b, a, t, s, u) \
+ SF(c, p, G80_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u)
+#define SX(c, p, r, s, u) \
+ SF(c, p, G80_ZETA_FORMAT_NONE, \
+ r, r, r, r, UINT, UINT, UINT, UINT, s, u)
+
+#define F3(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, g, b, ONE_FLOAT, t, s, u)
+#define I3(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, g, b, ONE_INT, t, s, u)
+
+#define F2(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, g, ZERO, ONE_FLOAT, t, s, u)
+#define I2(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, g, ZERO, ONE_INT, t, s, u)
+
+#define F1(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u)
+#define I1(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, ZERO, ZERO, ONE_INT, t, s, u)
+
+#define A1(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, ZERO, ZERO, ZERO, a, t, s, u)
+
+#if NOUVEAU_DRIVER == 0xc0
+const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
+#else
+const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
+#endif
+{
+ C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM, A8B8G8R8, TD),
+ F3(A, B8G8R8X8_UNORM, BGRX8_UNORM, B, G, R, xx, UNORM, A8B8G8R8, TD),
+ C4(A, B8G8R8A8_SRGB, BGRA8_SRGB, B, G, R, A, UNORM, A8B8G8R8, TD),
+ F3(A, B8G8R8X8_SRGB, BGRX8_SRGB, B, G, R, xx, UNORM, A8B8G8R8, TD),
+ C4(A, R8G8B8A8_UNORM, RGBA8_UNORM, R, G, B, A, UNORM, A8B8G8R8, IB),
+ F3(A, R8G8B8X8_UNORM, RGBX8_UNORM, R, G, B, xx, UNORM, A8B8G8R8, TB),
+ C4(A, R8G8B8A8_SRGB, RGBA8_SRGB, R, G, B, A, UNORM, A8B8G8R8, TB),
+ F3(A, R8G8B8X8_SRGB, RGBX8_SRGB, R, G, B, xx, UNORM, A8B8G8R8, TB),
+
+ ZX(B, Z16_UNORM, Z16_UNORM, R, R, R, xx, UNORM, Z16, TZ),
+ ZX(A, Z32_FLOAT, Z32_FLOAT, R, R, R, xx, FLOAT, ZF32, TZ),
+ ZX(A, Z24X8_UNORM, Z24_X8_UNORM, R, R, R, xx, UNORM, X8Z24, TZ),
+ SZ(A, X8Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ),
+ ZS(A, Z24_UNORM_S8_UINT, Z24_S8_UNORM, R, R, R, xx, UNORM, S8Z24, TZ),
+ SZ(A, S8_UINT_Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ),
+ ZS(A, Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, R, R, R, xx, FLOAT, ZF32_X24S8, TZ),
+
+ SX(A, S8_UINT, R, R8, T),
+ SX(A, X24S8_UINT, G, S8Z24, T),
+ SX(A, S8X24_UINT, R, Z24S8, T),
+ SX(A, X32_S8X24_UINT, G, ZF32_X24S8, T),
+
+ F3(A, B5G6R5_UNORM, B5G6R5_UNORM, B, G, R, xx, UNORM, B5G6R5, TD),
+ C4(A, B5G5R5A1_UNORM, BGR5_A1_UNORM, B, G, R, A, UNORM, A1B5G5R5, TD),
+ F3(A, B5G5R5X1_UNORM, BGR5_X1_UNORM, B, G, R, xx, UNORM, A1B5G5R5, TD),
+ C4(A, B4G4R4A4_UNORM, NONE, B, G, R, A, UNORM, A4B4G4R4, T),
+ F3(A, B4G4R4X4_UNORM, NONE, B, G, R, xx, UNORM, A4B4G4R4, T),
+ F3(A, R9G9B9E5_FLOAT, NONE, R, G, B, xx, FLOAT, E5B9G9R9_SHAREDEXP, T),
+
+ C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, IB),
+ C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, TD),
+ C4(A, R10G10B10A2_SNORM, NONE, R, G, B, A, SNORM, A2B10G10R10, T),
+ C4(A, B10G10R10A2_SNORM, NONE, B, G, R, A, SNORM, A2B10G10R10, T),
+ C4(A, R10G10B10A2_UINT, RGB10_A2_UINT, R, G, B, A, UINT, A2B10G10R10, TR),
+ C4(A, B10G10R10A2_UINT, RGB10_A2_UINT, B, G, R, A, UINT, A2B10G10R10, T),
+
+ F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB),
+
+ F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
+ F3(A, L8_SRGB, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
+ F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC),
+ I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR),
+ I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR),
+ F3(A, L16_UNORM, R16_UNORM, R, R, R, xx, UNORM, R16, TC),
+ F3(A, L16_SNORM, R16_SNORM, R, R, R, xx, SNORM, R16, TC),
+ F3(A, L16_FLOAT, R16_FLOAT, R, R, R, xx, FLOAT, R16, TB),
+ I3(A, L16_SINT, R16_SINT, R, R, R, xx, SINT, R16, TR),
+ I3(A, L16_UINT, R16_UINT, R, R, R, xx, UINT, R16, TR),
+ F3(A, L32_FLOAT, R32_FLOAT, R, R, R, xx, FLOAT, R32, TB),
+ I3(A, L32_SINT, R32_SINT, R, R, R, xx, SINT, R32, TR),
+ I3(A, L32_UINT, R32_UINT, R, R, R, xx, UINT, R32, TR),
+
+ C4(A, I8_UNORM, R8_UNORM, R, R, R, R, UNORM, R8, TR),
+ C4(A, I8_SNORM, R8_SNORM, R, R, R, R, SNORM, R8, TR),
+ C4(A, I8_SINT, R8_SINT, R, R, R, R, SINT, R8, TR),
+ C4(A, I8_UINT, R8_UINT, R, R, R, R, UINT, R8, TR),
+ C4(A, I16_UNORM, R16_UNORM, R, R, R, R, UNORM, R16, TR),
+ C4(A, I16_SNORM, R16_SNORM, R, R, R, R, SNORM, R16, TR),
+ C4(A, I16_FLOAT, R16_FLOAT, R, R, R, R, FLOAT, R16, TR),
+ C4(A, I16_SINT, R16_SINT, R, R, R, R, SINT, R16, TR),
+ C4(A, I16_UINT, R16_UINT, R, R, R, R, UINT, R16, TR),
+ C4(A, I32_FLOAT, R32_FLOAT, R, R, R, R, FLOAT, R32, TR),
+ C4(A, I32_SINT, R32_SINT, R, R, R, R, SINT, R32, TR),
+ C4(A, I32_UINT, R32_UINT, R, R, R, R, UINT, R32, TR),
+
+ A1(A, A8_UNORM, A8_UNORM, xx, xx, xx, R, UNORM, R8, TB),
+ A1(A, A8_SNORM, R8_SNORM, xx, xx, xx, R, SNORM, R8, T),
+ A1(A, A8_SINT, R8_SINT, xx, xx, xx, R, SINT, R8, T),
+ A1(A, A8_UINT, R8_UINT, xx, xx, xx, R, UINT, R8, T),
+ A1(A, A16_UNORM, R16_UNORM, xx, xx, xx, R, UNORM, R16, T),
+ A1(A, A16_SNORM, R16_SNORM, xx, xx, xx, R, SNORM, R16, T),
+ A1(A, A16_FLOAT, R16_FLOAT, xx, xx, xx, R, FLOAT, R16, T),
+ A1(A, A16_SINT, R16_SINT, xx, xx, xx, R, SINT, R16, T),
+ A1(A, A16_UINT, R16_UINT, xx, xx, xx, R, UINT, R16, T),
+ A1(A, A32_FLOAT, R32_FLOAT, xx, xx, xx, R, FLOAT, R32, T),
+ A1(A, A32_SINT, R32_SINT, xx, xx, xx, R, SINT, R32, T),
+ A1(A, A32_UINT, R32_UINT, xx, xx, xx, R, UINT, R32, T),
+
+ C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T),
+ C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
+ C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T),
+ C4(A, L8A8_SRGB, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
+ C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T),
+ C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T),
+ C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T),
+ C4(A, L16A16_SNORM, RG16_SNORM, R, R, R, G, SNORM, R16_G16, T),
+ C4(A, L16A16_FLOAT, RG16_FLOAT, R, R, R, G, FLOAT, R16_G16, T),
+ C4(A, L16A16_SINT, RG16_SINT, R, R, R, G, SINT, R16_G16, T),
+ C4(A, L16A16_UINT, RG16_UINT, R, R, R, G, UINT, R16_G16, T),
+ C4(A, L32A32_FLOAT, RG32_FLOAT, R, R, R, G, FLOAT, R32_G32, T),
+ C4(A, L32A32_SINT, RG32_SINT, R, R, R, G, SINT, R32_G32, T),
+ C4(A, L32A32_UINT, RG32_UINT, R, R, R, G, UINT, R32_G32, T),
+
+ F3(A, DXT1_RGB, NONE, R, G, B, xx, UNORM, DXT1, T),
+ F3(A, DXT1_SRGB, NONE, R, G, B, xx, UNORM, DXT1, T),
+ C4(A, DXT1_RGBA, NONE, R, G, B, A, UNORM, DXT1, T),
+ C4(A, DXT1_SRGBA, NONE, R, G, B, A, UNORM, DXT1, T),
+ C4(A, DXT3_RGBA, NONE, R, G, B, A, UNORM, DXT23, T),
+ C4(A, DXT3_SRGBA, NONE, R, G, B, A, UNORM, DXT23, T),
+ C4(A, DXT5_RGBA, NONE, R, G, B, A, UNORM, DXT45, T),
+ C4(A, DXT5_SRGBA, NONE, R, G, B, A, UNORM, DXT45, T),
+
+ F1(A, RGTC1_UNORM, NONE, R, xx, xx, xx, UNORM, DXN1, T),
+ F1(A, RGTC1_SNORM, NONE, R, xx, xx, xx, SNORM, DXN1, T),
+ F2(A, RGTC2_UNORM, NONE, R, G, xx, xx, UNORM, DXN2, T),
+ F2(A, RGTC2_SNORM, NONE, R, G, xx, xx, SNORM, DXN2, T),
+ F3(A, LATC1_UNORM, NONE, R, R, R, xx, UNORM, DXN1, T),
+ F3(A, LATC1_SNORM, NONE, R, R, R, xx, SNORM, DXN1, T),
+ C4(A, LATC2_UNORM, NONE, R, R, R, G, UNORM, DXN2, T),
+ C4(A, LATC2_SNORM, NONE, R, R, R, G, SNORM, DXN2, T),
+
+ C4(C, BPTC_RGBA_UNORM, NONE, R, G, B, A, UNORM, BC7U, t),
+ C4(C, BPTC_SRGBA, NONE, R, G, B, A, UNORM, BC7U, t),
+ F3(C, BPTC_RGB_FLOAT, NONE, R, G, B, xx, FLOAT, BC6H_SF16, t),
+ F3(C, BPTC_RGB_UFLOAT, NONE, R, G, B, xx, FLOAT, BC6H_UF16, t),
+
+ C4(A, R32G32B32A32_FLOAT, RGBA32_FLOAT, R, G, B, A, FLOAT, R32_G32_B32_A32, IB),
+ C4(A, R32G32B32A32_UNORM, NONE, R, G, B, A, UNORM, R32_G32_B32_A32, T),
+ C4(A, R32G32B32A32_SNORM, NONE, R, G, B, A, SNORM, R32_G32_B32_A32, T),
+ C4(A, R32G32B32A32_SINT, RGBA32_SINT, R, G, B, A, SINT, R32_G32_B32_A32, IR),
+ C4(A, R32G32B32A32_UINT, RGBA32_UINT, R, G, B, A, UINT, R32_G32_B32_A32, IR),
+ F3(A, R32G32B32X32_FLOAT, RGBX32_FLOAT, R, G, B, xx, FLOAT, R32_G32_B32_A32, TB),
+ I3(A, R32G32B32X32_SINT, RGBX32_SINT, R, G, B, xx, SINT, R32_G32_B32_A32, TR),
+ I3(A, R32G32B32X32_UINT, RGBX32_UINT, R, G, B, xx, UINT, R32_G32_B32_A32, TR),
+
+ F3(C, R32G32B32_FLOAT, NONE, R, G, B, xx, FLOAT, R32_G32_B32, t),
+ I3(C, R32G32B32_SINT, NONE, R, G, B, xx, SINT, R32_G32_B32, t),
+ I3(C, R32G32B32_UINT, NONE, R, G, B, xx, UINT, R32_G32_B32, t),
+
+ F2(A, R32G32_FLOAT, RG32_FLOAT, R, G, xx, xx, FLOAT, R32_G32, IB),
+ F2(A, R32G32_UNORM, NONE, R, G, xx, xx, UNORM, R32_G32, T),
+ F2(A, R32G32_SNORM, NONE, R, G, xx, xx, SNORM, R32_G32, T),
+ I2(A, R32G32_SINT, RG32_SINT, R, G, xx, xx, SINT, R32_G32, IR),
+ I2(A, R32G32_UINT, RG32_UINT, R, G, xx, xx, UINT, R32_G32, IR),
+
+ F1(A, R32_FLOAT, R32_FLOAT, R, xx, xx, xx, FLOAT, R32, IB),
+ F1(A, R32_UNORM, NONE, R, xx, xx, xx, UNORM, R32, T),
+ F1(A, R32_SNORM, NONE, R, xx, xx, xx, SNORM, R32, T),
+ I1(A, R32_SINT, R32_SINT, R, xx, xx, xx, SINT, R32, IR),
+ I1(A, R32_UINT, R32_UINT, R, xx, xx, xx, UINT, R32, IR),
+
+ C4(A, R16G16B16A16_FLOAT, RGBA16_FLOAT, R, G, B, A, FLOAT, R16_G16_B16_A16, IB),
+ C4(A, R16G16B16A16_UNORM, RGBA16_UNORM, R, G, B, A, UNORM, R16_G16_B16_A16, IC),
+ C4(A, R16G16B16A16_SNORM, RGBA16_SNORM, R, G, B, A, SNORM, R16_G16_B16_A16, IC),
+ C4(A, R16G16B16A16_SINT, RGBA16_SINT, R, G, B, A, SINT, R16_G16_B16_A16, IR),
+ C4(A, R16G16B16A16_UINT, RGBA16_UINT, R, G, B, A, UINT, R16_G16_B16_A16, IR),
+ F3(A, R16G16B16X16_FLOAT, RGBX16_FLOAT, R, G, B, xx, FLOAT, R16_G16_B16_A16, TB),
+ F3(A, R16G16B16X16_UNORM, RGBA16_UNORM, R, G, B, xx, UNORM, R16_G16_B16_A16, T),
+ F3(A, R16G16B16X16_SNORM, RGBA16_SNORM, R, G, B, xx, SNORM, R16_G16_B16_A16, T),
+ I3(A, R16G16B16X16_SINT, RGBA16_SINT, R, G, B, xx, SINT, R16_G16_B16_A16, T),
+ I3(A, R16G16B16X16_UINT, RGBA16_UINT, R, G, B, xx, UINT, R16_G16_B16_A16, T),
+
+ F2(A, R16G16_FLOAT, RG16_FLOAT, R, G, xx, xx, FLOAT, R16_G16, IB),
+ F2(A, R16G16_UNORM, RG16_UNORM, R, G, xx, xx, UNORM, R16_G16, IC),
+ F2(A, R16G16_SNORM, RG16_SNORM, R, G, xx, xx, SNORM, R16_G16, IC),
+ I2(A, R16G16_SINT, RG16_SINT, R, G, xx, xx, SINT, R16_G16, IR),
+ I2(A, R16G16_UINT, RG16_UINT, R, G, xx, xx, UINT, R16_G16, IR),
+
+ F1(A, R16_FLOAT, R16_FLOAT, R, xx, xx, xx, FLOAT, R16, IB),
+ F1(A, R16_UNORM, R16_UNORM, R, xx, xx, xx, UNORM, R16, IC),
+ F1(A, R16_SNORM, R16_SNORM, R, xx, xx, xx, SNORM, R16, IC),
+ I1(A, R16_SINT, R16_SINT, R, xx, xx, xx, SINT, R16, IR),
+ I1(A, R16_UINT, R16_UINT, R, xx, xx, xx, UINT, R16, IR),
+
+ C4(A, R8G8B8A8_SNORM, RGBA8_SNORM, R, G, B, A, SNORM, A8B8G8R8, IC),
+ C4(A, R8G8B8A8_SINT, RGBA8_SINT, R, G, B, A, SINT, A8B8G8R8, IR),
+ C4(A, R8G8B8A8_UINT, RGBA8_UINT, R, G, B, A, UINT, A8B8G8R8, IR),
+ F3(A, R8G8B8X8_SNORM, RGBA8_SNORM, R, G, B, xx, SNORM, A8B8G8R8, T),
+ I3(A, R8G8B8X8_SINT, RGBA8_SINT, R, G, B, xx, SINT, A8B8G8R8, T),
+ I3(A, R8G8B8X8_UINT, RGBA8_UINT, R, G, B, xx, UINT, A8B8G8R8, T),
+
+ F2(A, R8G8_UNORM, RG8_UNORM, R, G, xx, xx, UNORM, G8R8, IB),
+ F2(A, R8G8_SNORM, RG8_SNORM, R, G, xx, xx, SNORM, G8R8, IC),
+ I2(A, R8G8_SINT, RG8_SINT, R, G, xx, xx, SINT, G8R8, IR),
+ I2(A, R8G8_UINT, RG8_UINT, R, G, xx, xx, UINT, G8R8, IR),
+
+ F1(A, R8_UNORM, R8_UNORM, R, xx, xx, xx, UNORM, R8, IB),
+ F1(A, R8_SNORM, R8_SNORM, R, xx, xx, xx, SNORM, R8, IC),
+ I1(A, R8_SINT, R8_SINT, R, xx, xx, xx, SINT, R8, IR),
+ I1(A, R8_UINT, R8_UINT, R, xx, xx, xx, UINT, R8, IR),
+
+ F3(A, R8G8_B8G8_UNORM, NONE, R, G, B, xx, UNORM, G8B8G8R8, T),
+ F3(A, G8R8_B8R8_UNORM, NONE, G, R, B, xx, UNORM, G8B8G8R8, T),
+ F3(A, G8R8_G8B8_UNORM, NONE, R, G, B, xx, UNORM, B8G8R8G8, T),
+ F3(A, R8G8_R8B8_UNORM, NONE, G, R, B, xx, UNORM, B8G8R8G8, T),
+
+ F1(A, R1_UNORM, BITMAP, R, xx, xx, xx, UNORM, R1, T),
+
+ C4(A, R4A4_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G4R4, T),
+ C4(A, R8A8_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G8R8, T),
+ C4(A, A4R4_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G4R4, T),
+ C4(A, A8R8_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G8R8, T),
+
+ SF(A, R8SG8SB8UX8U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, A8B8G8R8, T),
+ SF(A, R5SG5SB6U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, B6G5R5, T),
+};
#if NOUVEAU_DRIVER == 0xc0
# define NVXX_3D_VAF_SIZE(s) NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##s
@@ -92,353 +325,138 @@
# define NVXX_3D_VAF_TYPE(t) NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t
#endif
-#define TBLENT_A_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u, br) \
- [PIPE_FORMAT_##pf] = { \
- sf, \
- (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \
- (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \
- (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \
- (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \
- (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
- (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
- (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
- (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
- NV50_TIC_0_FMT_##sz, \
- NVXX_3D_VAF_SIZE(sz) | \
- NVXX_3D_VAF_TYPE(t0) | (br << 31), \
- U_##u \
- }
-
-#define TBLENT_B_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \
+#define VF_x(pf, type, size, bgra) 0
+#define VF_A(pf, type, size, bgra) \
+ NVXX_3D_VAF_SIZE(size) | NVXX_3D_VAF_TYPE(type) | (bgra << 31)
+#define VF(c, pf, type, size, bgra) \
[PIPE_FORMAT_##pf] = { \
- sf, \
- (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \
- (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \
- (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \
- (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \
- (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
- (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
- (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
- (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
- NV50_TIC_0_FMT_##sz, 0, U_##u \
+ VF_##c(pf, type, size, bgra), \
+ PIPE_BIND_VERTEX_BUFFER \
}
-#define C4A(p, n, r, g, b, a, t, s, u, br) \
- TBLENT_A_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u, br)
-#define C4B(p, n, r, g, b, a, t, s, u) \
- TBLENT_B_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u)
-
-#define ZXB(p, n, r, g, b, a, t, s, u) \
- TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
- r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
-#define ZSB(p, n, r, g, b, a, t, s, u) \
- TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
- r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
-#define SZB(p, n, r, g, b, a, t, s, u) \
- TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
- r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u)
-#define SXB(p, r, s, u) \
- TBLENT_B_(p, NV50_ZETA_FORMAT_NONE, \
- r, r, r, r, UINT, UINT, UINT, UINT, s, u)
-
-#define F3A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, g, b, ONE_FLOAT, t, s, u, 0)
-#define I3A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, g, b, ONE_INT, t, s, u, 0)
-#define F3B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, g, b, ONE_FLOAT, t, s, u)
-#define I3B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, g, b, ONE_INT, t, s, u)
-
-#define F2A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, g, ZERO, ONE_FLOAT, t, s, u, 0)
-#define I2A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, g, ZERO, ONE_INT, t, s, u, 0)
-#define F2B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, g, ZERO, ONE_FLOAT, t, s, u)
-#define I2B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, g, ZERO, ONE_INT, t, s, u)
-
-#define F1A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u, 0)
-#define I1A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, ZERO, ZERO, ONE_INT, t, s, u, 0)
-#define F1B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u)
-#define I1B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, ZERO, ZERO, ONE_INT, t, s, u)
-
-#define A1B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, ZERO, ZERO, ZERO, a, t, s, u)
-
#if NOUVEAU_DRIVER == 0xc0
-const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
+const struct nvc0_vertex_format nvc0_vertex_format[PIPE_FORMAT_COUNT] =
#else
-const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
+const struct nv50_vertex_format nv50_vertex_format[PIPE_FORMAT_COUNT] =
#endif
{
- C4A(B8G8R8A8_UNORM, BGRA8_UNORM, C2, C1, C0, C3, UNORM, 8_8_8_8, TDV, 1),
- F3A(B8G8R8X8_UNORM, BGRX8_UNORM, C2, C1, C0, xx, UNORM, 8_8_8_8, TD),
- C4A(B8G8R8A8_SRGB, BGRA8_SRGB, C2, C1, C0, C3, UNORM, 8_8_8_8, TD, 1),
- F3A(B8G8R8X8_SRGB, BGRX8_SRGB, C2, C1, C0, xx, UNORM, 8_8_8_8, TD),
- C4A(R8G8B8A8_UNORM, RGBA8_UNORM, C0, C1, C2, C3, UNORM, 8_8_8_8, IBV, 0),
- F3A(R8G8B8X8_UNORM, RGBX8_UNORM, C0, C1, C2, xx, UNORM, 8_8_8_8, TB),
- C4A(R8G8B8A8_SRGB, RGBA8_SRGB, C0, C1, C2, C3, UNORM, 8_8_8_8, TB, 0),
- F3B(R8G8B8X8_SRGB, RGBX8_SRGB, C0, C1, C2, xx, UNORM, 8_8_8_8, TB),
-
- ZXB(Z16_UNORM, Z16_UNORM, C0, C0, C0, xx, UNORM, Z16, TZ),
- ZXB(Z32_FLOAT, Z32_FLOAT, C0, C0, C0, xx, FLOAT, Z32, TZ),
- ZXB(Z24X8_UNORM, Z24_X8_UNORM, C0, C0, C0, xx, UNORM, Z24_X8, TZ),
- SZB(X8Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ),
- ZSB(Z24_UNORM_S8_UINT, Z24_S8_UNORM, C0, C0, C0, xx, UNORM, Z24_S8, TZ),
- SZB(S8_UINT_Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ),
- ZSB(Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, C0, C0, C0, xx, FLOAT,
- Z32_S8_X24, TZ),
-
- SXB(S8_UINT, C0, 8, T),
- SXB(X24S8_UINT, C1, Z24_S8, T),
- SXB(S8X24_UINT, C0, S8_Z24, T),
- SXB(X32_S8X24_UINT, C1, Z32_S8_X24, T),
-
- F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
- C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
- F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
- C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
- F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
- F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
-
- C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
- IBV, 0),
- C4A(B10G10R10A2_UNORM, BGR10_A2_UNORM, C2, C1, C0, C3, UNORM, 10_10_10_2,
- TDV, 1),
- C4A(R10G10B10A2_SNORM, NONE, C0, C1, C2, C3, SNORM, 10_10_10_2, TV, 0),
- C4A(B10G10R10A2_SNORM, NONE, C2, C1, C0, C3, SNORM, 10_10_10_2, TV, 1),
- C4A(R10G10B10A2_UINT, RGB10_A2_UINT, C0, C1, C2, C3, UINT, 10_10_10_2, TRV, 0),
- C4A(B10G10R10A2_UINT, RGB10_A2_UINT, C2, C1, C0, C3, UINT, 10_10_10_2, TV, 0),
-
- F3A(R11G11B10_FLOAT, R11G11B10_FLOAT, C0, C1, C2, xx, FLOAT, 11_11_10, IBV),
-
- F3B(L8_UNORM, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB),
- F3B(L8_SRGB, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB),
- F3B(L8_SNORM, R8_SNORM, C0, C0, C0, xx, SNORM, 8, TC),
- I3B(L8_SINT, R8_SINT, C0, C0, C0, xx, SINT, 8, TR),
- I3B(L8_UINT, R8_UINT, C0, C0, C0, xx, UINT, 8, TR),
- F3B(L16_UNORM, R16_UNORM, C0, C0, C0, xx, UNORM, 16, TC),
- F3B(L16_SNORM, R16_SNORM, C0, C0, C0, xx, SNORM, 16, TC),
- F3B(L16_FLOAT, R16_FLOAT, C0, C0, C0, xx, FLOAT, 16, TB),
- I3B(L16_SINT, R16_SINT, C0, C0, C0, xx, SINT, 16, TR),
- I3B(L16_UINT, R16_UINT, C0, C0, C0, xx, UINT, 16, TR),
- F3B(L32_FLOAT, R32_FLOAT, C0, C0, C0, xx, FLOAT, 32, TB),
- I3B(L32_SINT, R32_SINT, C0, C0, C0, xx, SINT, 32, TR),
- I3B(L32_UINT, R32_UINT, C0, C0, C0, xx, UINT, 32, TR),
-
- C4B(I8_UNORM, R8_UNORM, C0, C0, C0, C0, UNORM, 8, TR),
- C4B(I8_SNORM, R8_SNORM, C0, C0, C0, C0, SNORM, 8, TR),
- C4B(I8_SINT, R8_SINT, C0, C0, C0, C0, SINT, 8, TR),
- C4B(I8_UINT, R8_UINT, C0, C0, C0, C0, UINT, 8, TR),
- C4B(I16_UNORM, R16_UNORM, C0, C0, C0, C0, UNORM, 16, TR),
- C4B(I16_SNORM, R16_SNORM, C0, C0, C0, C0, SNORM, 16, TR),
- C4B(I16_FLOAT, R16_FLOAT, C0, C0, C0, C0, FLOAT, 16, TR),
- C4B(I16_SINT, R16_SINT, C0, C0, C0, C0, SINT, 16, TR),
- C4B(I16_UINT, R16_UINT, C0, C0, C0, C0, UINT, 16, TR),
- C4B(I32_FLOAT, R32_FLOAT, C0, C0, C0, C0, FLOAT, 32, TR),
- C4B(I32_SINT, R32_SINT, C0, C0, C0, C0, SINT, 32, TR),
- C4B(I32_UINT, R32_UINT, C0, C0, C0, C0, UINT, 32, TR),
-
- A1B(A8_UNORM, A8_UNORM, xx, xx, xx, C0, UNORM, 8, TB),
- A1B(A8_SNORM, R8_SNORM, xx, xx, xx, C0, SNORM, 8, T),
- A1B(A8_SINT, R8_SINT, xx, xx, xx, C0, SINT, 8, T),
- A1B(A8_UINT, R8_UINT, xx, xx, xx, C0, UINT, 8, T),
- A1B(A16_UNORM, R16_UNORM, xx, xx, xx, C0, UNORM, 16, T),
- A1B(A16_SNORM, R16_SNORM, xx, xx, xx, C0, SNORM, 16, T),
- A1B(A16_FLOAT, R16_FLOAT, xx, xx, xx, C0, FLOAT, 16, T),
- A1B(A16_SINT, R16_SINT, xx, xx, xx, C0, SINT, 16, T),
- A1B(A16_UINT, R16_UINT, xx, xx, xx, C0, UINT, 16, T),
- A1B(A32_FLOAT, R32_FLOAT, xx, xx, xx, C0, FLOAT, 32, T),
- A1B(A32_SINT, R32_SINT, xx, xx, xx, C0, SINT, 32, T),
- A1B(A32_UINT, R32_UINT, xx, xx, xx, C0, UINT, 32, T),
-
- C4B(L4A4_UNORM, NONE, C0, C0, C0, C1, UNORM, 4_4, T),
- C4B(L8A8_UNORM, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T),
- C4B(L8A8_SNORM, RG8_SNORM, C0, C0, C0, C1, SNORM, 8_8, T),
- C4B(L8A8_SRGB, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T),
- C4B(L8A8_SINT, RG8_SINT, C0, C0, C0, C1, SINT, 8_8, T),
- C4B(L8A8_UINT, RG8_UINT, C0, C0, C0, C1, UINT, 8_8, T),
- C4B(L16A16_UNORM, RG16_UNORM, C0, C0, C0, C1, UNORM, 16_16, T),
- C4B(L16A16_SNORM, RG16_SNORM, C0, C0, C0, C1, SNORM, 16_16, T),
- C4B(L16A16_FLOAT, RG16_FLOAT, C0, C0, C0, C1, FLOAT, 16_16, T),
- C4B(L16A16_SINT, RG16_SINT, C0, C0, C0, C1, SINT, 16_16, T),
- C4B(L16A16_UINT, RG16_UINT, C0, C0, C0, C1, UINT, 16_16, T),
- C4B(L32A32_FLOAT, RG32_FLOAT, C0, C0, C0, C1, FLOAT, 32_32, T),
- C4B(L32A32_SINT, RG32_SINT, C0, C0, C0, C1, SINT, 32_32, T),
- C4B(L32A32_UINT, RG32_UINT, C0, C0, C0, C1, UINT, 32_32, T),
-
- F3B(DXT1_RGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T),
- F3B(DXT1_SRGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T),
- C4B(DXT1_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T),
- C4B(DXT1_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T),
- C4B(DXT3_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T),
- C4B(DXT3_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T),
- C4B(DXT5_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T),
- C4B(DXT5_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T),
-
- F1B(RGTC1_UNORM, NONE, C0, xx, xx, xx, UNORM, RGTC1, T),
- F1B(RGTC1_SNORM, NONE, C0, xx, xx, xx, SNORM, RGTC1, T),
- F2B(RGTC2_UNORM, NONE, C0, C1, xx, xx, UNORM, RGTC2, T),
- F2B(RGTC2_SNORM, NONE, C0, C1, xx, xx, SNORM, RGTC2, T),
- F3B(LATC1_UNORM, NONE, C0, C0, C0, xx, UNORM, RGTC1, T),
- F3B(LATC1_SNORM, NONE, C0, C0, C0, xx, SNORM, RGTC1, T),
- C4B(LATC2_UNORM, NONE, C0, C0, C0, C1, UNORM, RGTC2, T),
- C4B(LATC2_SNORM, NONE, C0, C0, C0, C1, SNORM, RGTC2, T),
-
- C4B(BPTC_RGBA_UNORM, NONE, C0, C1, C2, C3, UNORM, BPTC, t),
- C4B(BPTC_SRGBA, NONE, C0, C1, C2, C3, UNORM, BPTC, t),
- F3B(BPTC_RGB_FLOAT, NONE, C0, C1, C2, xx, FLOAT, BPTC_FLOAT, t),
- F3B(BPTC_RGB_UFLOAT, NONE, C0, C1, C2, xx, FLOAT, BPTC_UFLOAT, t),
-
- C4A(R32G32B32A32_FLOAT, RGBA32_FLOAT, C0, C1, C2, C3, FLOAT, 32_32_32_32,
- IBV, 0),
- C4A(R32G32B32A32_UNORM, NONE, C0, C1, C2, C3, UNORM, 32_32_32_32, TV, 0),
- C4A(R32G32B32A32_SNORM, NONE, C0, C1, C2, C3, SNORM, 32_32_32_32, TV, 0),
- C4A(R32G32B32A32_SINT, RGBA32_SINT, C0, C1, C2, C3, SINT, 32_32_32_32,
- IRV, 0),
- C4A(R32G32B32A32_UINT, RGBA32_UINT, C0, C1, C2, C3, UINT, 32_32_32_32,
- IRV, 0),
- F3B(R32G32B32X32_FLOAT, RGBX32_FLOAT, C0, C1, C2, xx, FLOAT, 32_32_32_32, TB),
- I3B(R32G32B32X32_SINT, RGBX32_SINT, C0, C1, C2, xx, SINT, 32_32_32_32, TR),
- I3B(R32G32B32X32_UINT, RGBX32_UINT, C0, C1, C2, xx, UINT, 32_32_32_32, TR),
-
- F2A(R32G32_FLOAT, RG32_FLOAT, C0, C1, xx, xx, FLOAT, 32_32, IBV),
- F2A(R32G32_UNORM, NONE, C0, C1, xx, xx, UNORM, 32_32, TV),
- F2A(R32G32_SNORM, NONE, C0, C1, xx, xx, SNORM, 32_32, TV),
- I2A(R32G32_SINT, RG32_SINT, C0, C1, xx, xx, SINT, 32_32, IRV),
- I2A(R32G32_UINT, RG32_UINT, C0, C1, xx, xx, UINT, 32_32, IRV),
-
- F1A(R32_FLOAT, R32_FLOAT, C0, xx, xx, xx, FLOAT, 32, IBV),
- F1A(R32_UNORM, NONE, C0, xx, xx, xx, UNORM, 32, TV),
- F1A(R32_SNORM, NONE, C0, xx, xx, xx, SNORM, 32, TV),
- I1A(R32_SINT, R32_SINT, C0, xx, xx, xx, SINT, 32, IRV),
- I1A(R32_UINT, R32_UINT, C0, xx, xx, xx, UINT, 32, IRV),
-
- C4A(R16G16B16A16_FLOAT, RGBA16_FLOAT, C0, C1, C2, C3, FLOAT, 16_16_16_16,
- IBV, 0),
- C4A(R16G16B16A16_UNORM, RGBA16_UNORM, C0, C1, C2, C3, UNORM, 16_16_16_16,
- ICV, 0),
- C4A(R16G16B16A16_SNORM, RGBA16_SNORM, C0, C1, C2, C3, SNORM, 16_16_16_16,
- ICV, 0),
- C4A(R16G16B16A16_SINT, RGBA16_SINT, C0, C1, C2, C3, SINT, 16_16_16_16,
- IRV, 0),
- C4A(R16G16B16A16_UINT, RGBA16_UINT, C0, C1, C2, C3, UINT, 16_16_16_16,
- IRV, 0),
- F3B(R16G16B16X16_FLOAT, RGBX16_FLOAT, C0, C1, C2, xx, FLOAT, 16_16_16_16, TB),
- F3B(R16G16B16X16_UNORM, RGBA16_UNORM, C0, C1, C2, xx, UNORM, 16_16_16_16, T),
- F3B(R16G16B16X16_SNORM, RGBA16_SNORM, C0, C1, C2, xx, SNORM, 16_16_16_16, T),
- I3B(R16G16B16X16_SINT, RGBA16_SINT, C0, C1, C2, xx, SINT, 16_16_16_16, T),
- I3B(R16G16B16X16_UINT, RGBA16_UINT, C0, C1, C2, xx, UINT, 16_16_16_16, T),
-
- F2A(R16G16_FLOAT, RG16_FLOAT, C0, C1, xx, xx, FLOAT, 16_16, IBV),
- F2A(R16G16_UNORM, RG16_UNORM, C0, C1, xx, xx, UNORM, 16_16, ICV),
- F2A(R16G16_SNORM, RG16_SNORM, C0, C1, xx, xx, SNORM, 16_16, ICV),
- I2A(R16G16_SINT, RG16_SINT, C0, C1, xx, xx, SINT, 16_16, IRV),
- I2A(R16G16_UINT, RG16_UINT, C0, C1, xx, xx, UINT, 16_16, IRV),
-
- F1A(R16_FLOAT, R16_FLOAT, C0, xx, xx, xx, FLOAT, 16, IBV),
- F1A(R16_UNORM, R16_UNORM, C0, xx, xx, xx, UNORM, 16, ICV),
- F1A(R16_SNORM, R16_SNORM, C0, xx, xx, xx, SNORM, 16, ICV),
- I1A(R16_SINT, R16_SINT, C0, xx, xx, xx, SINT, 16, IRV),
- I1A(R16_UINT, R16_UINT, C0, xx, xx, xx, UINT, 16, IRV),
-
- C4A(R8G8B8A8_SNORM, RGBA8_SNORM, C0, C1, C2, C3, SNORM, 8_8_8_8, ICV, 0),
- C4A(R8G8B8A8_SINT, RGBA8_SINT, C0, C1, C2, C3, SINT, 8_8_8_8, IRV, 0),
- C4A(R8G8B8A8_UINT, RGBA8_UINT, C0, C1, C2, C3, UINT, 8_8_8_8, IRV, 0),
- F3B(R8G8B8X8_SNORM, RGBA8_SNORM, C0, C1, C2, xx, SNORM, 8_8_8_8, T),
- I3B(R8G8B8X8_SINT, RGBA8_SINT, C0, C1, C2, xx, SINT, 8_8_8_8, T),
- I3B(R8G8B8X8_UINT, RGBA8_UINT, C0, C1, C2, xx, UINT, 8_8_8_8, T),
-
- F2A(R8G8_UNORM, RG8_UNORM, C0, C1, xx, xx, UNORM, 8_8, IBV),
- F2A(R8G8_SNORM, RG8_SNORM, C0, C1, xx, xx, SNORM, 8_8, ICV),
- I2A(R8G8_SINT, RG8_SINT, C0, C1, xx, xx, SINT, 8_8, IRV),
- I2A(R8G8_UINT, RG8_UINT, C0, C1, xx, xx, UINT, 8_8, IRV),
-
- F1A(R8_UNORM, R8_UNORM, C0, xx, xx, xx, UNORM, 8, IBV),
- F1A(R8_SNORM, R8_SNORM, C0, xx, xx, xx, SNORM, 8, ICV),
- I1A(R8_SINT, R8_SINT, C0, xx, xx, xx, SINT, 8, IRV),
- I1A(R8_UINT, R8_UINT, C0, xx, xx, xx, UINT, 8, IRV),
-
- F3B(R8G8_B8G8_UNORM, NONE, C0, C1, C2, xx, UNORM, U8_YA8_V8_YB8, T),
- F3B(G8R8_B8R8_UNORM, NONE, C1, C0, C2, xx, UNORM, U8_YA8_V8_YB8, T),
- F3B(G8R8_G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, YA8_U8_YB8_V8, T),
- F3B(R8G8_R8B8_UNORM, NONE, C1, C0, C2, xx, UNORM, YA8_U8_YB8_V8, T),
-
- F1B(R1_UNORM, BITMAP, C0, xx, xx, xx, UNORM, BITMAP, T),
-
- C4B(R4A4_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 4_4, T),
- C4B(R8A8_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 8_8, T),
- C4B(A4R4_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 4_4, T),
- C4B(A8R8_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 8_8, T),
-
- TBLENT_B_(R8SG8SB8UX8U_NORM, 0,
- C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, T),
- TBLENT_B_(R5SG5SB6U_NORM, 0,
- C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, T),
-
- /* vertex-only formats: */
-
- C4A(R32G32B32A32_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 32_32_32_32, V, 0),
- C4A(R32G32B32A32_USCALED, NONE, C0, C1, C2, C3, USCALED, 32_32_32_32, V, 0),
- F3A(R32G32B32_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, tV),
- F3A(R32G32B32_UNORM, NONE, C0, C1, C2, xx, UNORM, 32_32_32, V),
- F3A(R32G32B32_SNORM, NONE, C0, C1, C2, xx, SNORM, 32_32_32, V),
- I3A(R32G32B32_SINT, NONE, C0, C1, C2, xx, SINT, 32_32_32, tV),
- I3A(R32G32B32_UINT, NONE, C0, C1, C2, xx, UINT, 32_32_32, tV),
- F3A(R32G32B32_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 32_32_32, V),
- F3A(R32G32B32_USCALED, NONE, C0, C1, C2, xx, USCALED, 32_32_32, V),
- F2A(R32G32_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 32_32, V),
- F2A(R32G32_USCALED, NONE, C0, C1, xx, xx, USCALED, 32_32, V),
- F1A(R32_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 32, V),
- F1A(R32_USCALED, NONE, C0, xx, xx, xx, USCALED, 32, V),
-
- C4A(R16G16B16A16_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 16_16_16_16, V, 0),
- C4A(R16G16B16A16_USCALED, NONE, C0, C1, C2, C3, USCALED, 16_16_16_16, V, 0),
- F3A(R16G16B16_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 16_16_16, V),
- F3A(R16G16B16_UNORM, NONE, C0, C1, C2, xx, UNORM, 16_16_16, V),
- F3A(R16G16B16_SNORM, NONE, C0, C1, C2, xx, SNORM, 16_16_16, V),
- I3A(R16G16B16_SINT, NONE, C0, C1, C2, xx, SINT, 16_16_16, V),
- I3A(R16G16B16_UINT, NONE, C0, C1, C2, xx, UINT, 16_16_16, V),
- F3A(R16G16B16_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 16_16_16, V),
- F3A(R16G16B16_USCALED, NONE, C0, C1, C2, xx, USCALED, 16_16_16, V),
- F2A(R16G16_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 16_16, V),
- F2A(R16G16_USCALED, NONE, C0, C1, xx, xx, USCALED, 16_16, V),
- F1A(R16_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 16, V),
- F1A(R16_USCALED, NONE, C0, xx, xx, xx, USCALED, 16, V),
-
- C4A(R10G10B10A2_USCALED, NONE, C0, C1, C2, C3, USCALED, 10_10_10_2, V, 0),
- C4A(R10G10B10A2_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 10_10_10_2, V, 0),
- C4A(B10G10R10A2_USCALED, NONE, C0, C1, C2, C3, USCALED, 10_10_10_2, V, 1),
- C4A(B10G10R10A2_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 10_10_10_2, V, 1),
-
- C4A(R8G8B8A8_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 8_8_8_8, V, 0),
- C4A(R8G8B8A8_USCALED, NONE, C0, C1, C2, C3, USCALED, 8_8_8_8, V, 0),
- F3A(R8G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, 8_8_8, V),
- F3A(R8G8B8_SNORM, NONE, C0, C1, C2, xx, SNORM, 8_8_8, V),
- I2A(R8G8B8_SINT, NONE, C0, C1, C2, xx, SINT, 8_8_8, V),
- I2A(R8G8B8_UINT, NONE, C0, C1, C2, xx, UINT, 8_8_8, V),
- F3A(R8G8B8_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 8_8_8, V),
- F3A(R8G8B8_USCALED, NONE, C0, C1, C2, xx, USCALED, 8_8_8, V),
- F2A(R8G8_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 8_8, V),
- F2A(R8G8_USCALED, NONE, C0, C1, xx, xx, USCALED, 8_8, V),
- F1A(R8_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 8, V),
- F1A(R8_USCALED, NONE, C0, xx, xx, xx, USCALED, 8, V),
+ VF(A, B8G8R8A8_UNORM, UNORM, 8_8_8_8, 1),
+ VF(A, R8G8B8A8_UNORM, UNORM, 8_8_8_8, 0),
+
+ VF(A, R10G10B10A2_UNORM, UNORM, 10_10_10_2, 0),
+ VF(A, B10G10R10A2_UNORM, UNORM, 10_10_10_2, 1),
+ VF(A, R10G10B10A2_SNORM, SNORM, 10_10_10_2, 0),
+ VF(A, B10G10R10A2_SNORM, SNORM, 10_10_10_2, 1),
+ VF(A, R10G10B10A2_UINT, UINT, 10_10_10_2, 0),
+ VF(A, B10G10R10A2_UINT, UINT, 10_10_10_2, 0),
+
+ VF(A, R11G11B10_FLOAT, FLOAT, 11_11_10, 0),
+
+ VF(A, R32G32B32A32_FLOAT, FLOAT, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_UNORM, UNORM, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_SNORM, SNORM, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_SINT, SINT, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_UINT, UINT, 32_32_32_32, 0),
+
+ VF(A, R32G32_FLOAT, FLOAT, 32_32, 0),
+ VF(A, R32G32_UNORM, UNORM, 32_32, 0),
+ VF(A, R32G32_SNORM, SNORM, 32_32, 0),
+ VF(A, R32G32_SINT, SINT, 32_32, 0),
+ VF(A, R32G32_UINT, UINT, 32_32, 0),
+
+ VF(A, R32_FLOAT, FLOAT, 32, 0),
+ VF(A, R32_UNORM, UNORM, 32, 0),
+ VF(A, R32_SNORM, SNORM, 32, 0),
+ VF(A, R32_SINT, SINT, 32, 0),
+ VF(A, R32_UINT, UINT, 32, 0),
+
+ VF(A, R16G16B16A16_FLOAT, FLOAT, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_UNORM, UNORM, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_SNORM, SNORM, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_SINT, SINT, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_UINT, UINT, 16_16_16_16, 0),
+
+ VF(A, R16G16_FLOAT, FLOAT, 16_16, 0),
+ VF(A, R16G16_UNORM, UNORM, 16_16, 0),
+ VF(A, R16G16_SNORM, SNORM, 16_16, 0),
+ VF(A, R16G16_SINT, SINT, 16_16, 0),
+ VF(A, R16G16_UINT, UINT, 16_16, 0),
+
+ VF(A, R16_FLOAT, FLOAT, 16, 0),
+ VF(A, R16_UNORM, UNORM, 16, 0),
+ VF(A, R16_SNORM, SNORM, 16, 0),
+ VF(A, R16_SINT, SINT, 16, 0),
+ VF(A, R16_UINT, UINT, 16, 0),
+
+ VF(A, R8G8B8A8_SNORM, SNORM, 8_8_8_8, 0),
+ VF(A, R8G8B8A8_SINT, SINT, 8_8_8_8, 0),
+ VF(A, R8G8B8A8_UINT, UINT, 8_8_8_8, 0),
+
+ VF(A, R8G8_UNORM, UNORM, 8_8, 0),
+ VF(A, R8G8_SNORM, SNORM, 8_8, 0),
+ VF(A, R8G8_SINT, SINT, 8_8, 0),
+ VF(A, R8G8_UINT, UINT, 8_8, 0),
+
+ VF(A, R8_UNORM, UNORM, 8, 0),
+ VF(A, R8_SNORM, SNORM, 8, 0),
+ VF(A, R8_SINT, SINT, 8, 0),
+ VF(A, R8_UINT, UINT, 8, 0),
+
+ VF(A, R32G32B32A32_SSCALED, SSCALED, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_USCALED, USCALED, 32_32_32_32, 0),
+ VF(A, R32G32B32_FLOAT, FLOAT, 32_32_32, 0),
+ VF(A, R32G32B32_UNORM, UNORM, 32_32_32, 0),
+ VF(A, R32G32B32_SNORM, SNORM, 32_32_32, 0),
+ VF(A, R32G32B32_SINT, SINT, 32_32_32, 0),
+ VF(A, R32G32B32_UINT, UINT, 32_32_32, 0),
+ VF(A, R32G32B32_SSCALED, SSCALED, 32_32_32, 0),
+ VF(A, R32G32B32_USCALED, USCALED, 32_32_32, 0),
+ VF(A, R32G32_SSCALED, SSCALED, 32_32, 0),
+ VF(A, R32G32_USCALED, USCALED, 32_32, 0),
+ VF(A, R32_SSCALED, SSCALED, 32, 0),
+ VF(A, R32_USCALED, USCALED, 32, 0),
+
+ VF(A, R16G16B16A16_SSCALED, SSCALED, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_USCALED, USCALED, 16_16_16_16, 0),
+ VF(A, R16G16B16_FLOAT, FLOAT, 16_16_16, 0),
+ VF(A, R16G16B16_UNORM, UNORM, 16_16_16, 0),
+ VF(A, R16G16B16_SNORM, SNORM, 16_16_16, 0),
+ VF(A, R16G16B16_SINT, SINT, 16_16_16, 0),
+ VF(A, R16G16B16_UINT, UINT, 16_16_16, 0),
+ VF(A, R16G16B16_SSCALED, SSCALED, 16_16_16, 0),
+ VF(A, R16G16B16_USCALED, USCALED, 16_16_16, 0),
+ VF(A, R16G16_SSCALED, SSCALED, 16_16, 0),
+ VF(A, R16G16_USCALED, USCALED, 16_16, 0),
+ VF(A, R16_SSCALED, SSCALED, 16, 0),
+ VF(A, R16_USCALED, USCALED, 16, 0),
+
+ VF(A, R10G10B10A2_USCALED, USCALED, 10_10_10_2, 0),
+ VF(A, R10G10B10A2_SSCALED, SSCALED, 10_10_10_2, 0),
+ VF(A, B10G10R10A2_USCALED, USCALED, 10_10_10_2, 1),
+ VF(A, B10G10R10A2_SSCALED, SSCALED, 10_10_10_2, 1),
+
+ VF(A, R8G8B8A8_SSCALED, SSCALED, 8_8_8_8, 0),
+ VF(A, R8G8B8A8_USCALED, USCALED, 8_8_8_8, 0),
+ VF(A, R8G8B8_UNORM, UNORM, 8_8_8, 0),
+ VF(A, R8G8B8_SNORM, SNORM, 8_8_8, 0),
+ VF(A, R8G8B8_SINT, SINT, 8_8_8, 0),
+ VF(A, R8G8B8_UINT, UINT, 8_8_8, 0),
+ VF(A, R8G8B8_SSCALED, SSCALED, 8_8_8, 0),
+ VF(A, R8G8B8_USCALED, USCALED, 8_8_8, 0),
+ VF(A, R8G8_SSCALED, SSCALED, 8_8, 0),
+ VF(A, R8G8_USCALED, USCALED, 8_8, 0),
+ VF(A, R8_SSCALED, SSCALED, 8, 0),
+ VF(A, R8_USCALED, USCALED, 8, 0),
/* FIXED types: not supported natively, converted on VBO push */
- C4B(R32G32B32A32_FIXED, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V),
- F3B(R32G32B32_FIXED, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V),
- F2B(R32G32_FIXED, NONE, C0, C1, xx, xx, FLOAT, 32_32, V),
- F1B(R32_FIXED, NONE, C0, xx, xx, xx, FLOAT, 32, V),
+ VF(x, R32G32B32A32_FIXED, xx, xx, xx),
+ VF(x, R32G32B32_FIXED, xx, xx, xx),
+ VF(x, R32G32_FIXED, xx, xx, xx),
+ VF(x, R32_FIXED, xx, xx, xx),
- C4B(R64G64B64A64_FLOAT, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V),
- F3B(R64G64B64_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V),
- F2B(R64G64_FLOAT, NONE, C0, C1, xx, xx, FLOAT, 32_32, V),
- F1B(R64_FLOAT, NONE, C0, xx, xx, xx, FLOAT, 32, V),
+ VF(x, R64G64B64A64_FLOAT, xx, xx, xx),
+ VF(x, R64G64B64_FLOAT, xx, xx, xx),
+ VF(x, R64G64_FLOAT, xx, xx, xx),
+ VF(x, R64_FLOAT, xx, xx, xx),
};
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
index 79c7023b2d4..be19c0fdc85 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -218,11 +218,12 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
struct pipe_context *pipe = &nv50->base.pipe;
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ struct pipe_grid_info info = {};
uint32_t mask;
uint32_t input[3];
const uint block[3] = { 32, 1, 1 };
const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
- int c;
+ int c, i;
if (unlikely(!screen->pm.prog)) {
struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
@@ -262,7 +263,14 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
pipe->bind_compute_state(pipe, screen->pm.prog);
input[0] = hq->bo->offset + hq->base_offset;
input[1] = hq->sequence;
- pipe->launch_grid(pipe, block, grid, 0, input);
+
+ for (i = 0; i < 3; i++) {
+ info.block[i] = block[i];
+ info.grid[i] = grid[i];
+ }
+ info.pc = 0;
+ info.input = input;
+ pipe->launch_grid(pipe, &info);
nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 14d0085975b..8d11dd7bf21 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -72,7 +72,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
PIPE_BIND_TRANSFER_WRITE |
PIPE_BIND_SHARED);
- return (nv50_format_table[format].usage & bindings) == bindings;
+ return (( nv50_format_table[format].usage |
+ nv50_vertex_format[format].usage) & bindings) == bindings;
}
static int
@@ -263,8 +264,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
- case PIPE_SHADER_COMPUTE:
break;
+ case PIPE_SHADER_COMPUTE:
default:
return 0;
}
@@ -315,6 +316,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -562,7 +565,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
if (screen->tesla->oclass >= NVA0_3D_CLASS) {
BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
- PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ PUSH_DATA (push, 0);
}
BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 2a4983d1020..cce92f0dd5e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -50,6 +50,7 @@ struct nv50_graph_state {
uint8_t num_samplers[3];
uint8_t prim_size;
uint16_t scissor;
+ bool seamless_cube_map;
};
struct nv50_screen {
@@ -156,12 +157,27 @@ nv50_resource_validate(struct nv04_resource *res, uint32_t flags)
struct nv50_format {
uint32_t rt;
- uint32_t tic;
+ struct {
+ unsigned format:6;
+ unsigned type_r:3;
+ unsigned type_g:3;
+ unsigned type_b:3;
+ unsigned type_a:3;
+ unsigned src_x:3;
+ unsigned src_y:3;
+ unsigned src_z:3;
+ unsigned src_w:3;
+ } tic;
+ uint32_t usage;
+};
+
+struct nv50_vertex_format {
uint32_t vtx;
uint32_t usage;
};
extern const struct nv50_format nv50_format_table[];
+extern const struct nv50_vertex_format nv50_vertex_format[];
static inline void
nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index cb040439139..6a09808807a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -21,6 +21,7 @@
*/
#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
@@ -33,7 +34,7 @@
#include "nv50/nv50_query_hw.h"
#include "nv50/nv50_3d.xml.h"
-#include "nv50/nv50_texture.xml.h"
+#include "nv50/g80_texture.xml.h"
#include "nouveau_gldefs.h"
@@ -437,24 +438,29 @@ nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
/* ====================== SAMPLERS AND TEXTURES ================================
*/
-#define NV50_TSC_WRAP_CASE(n) \
- case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
-
static inline unsigned
nv50_tsc_wrap_mode(unsigned wrap)
{
switch (wrap) {
- NV50_TSC_WRAP_CASE(REPEAT);
- NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
- NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
- NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
- NV50_TSC_WRAP_CASE(CLAMP);
- NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
- NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
- NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+ case PIPE_TEX_WRAP_REPEAT:
+ return G80_TSC_WRAP_WRAP;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return G80_TSC_WRAP_MIRROR;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return G80_TSC_WRAP_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return G80_TSC_WRAP_BORDER;
+ case PIPE_TEX_WRAP_CLAMP:
+ return G80_TSC_WRAP_CLAMP_OGL;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return G80_TSC_WRAP_MIRROR_ONCE_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return G80_TSC_WRAP_MIRROR_ONCE_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return G80_TSC_WRAP_MIRROR_ONCE_CLAMP_OGL;
default:
NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
- return NV50_TSC_WRAP_REPEAT;
+ return G80_TSC_WRAP_WRAP;
}
}
@@ -474,42 +480,44 @@ nv50_sampler_state_create(struct pipe_context *pipe,
switch (cso->mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
- so->tsc[1] = NV50_TSC_1_MAGF_LINEAR;
+ so->tsc[1] = G80_TSC_1_MAG_FILTER_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
- so->tsc[1] = NV50_TSC_1_MAGF_NEAREST;
+ so->tsc[1] = G80_TSC_1_MAG_FILTER_NEAREST;
break;
}
switch (cso->min_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
- so->tsc[1] |= NV50_TSC_1_MINF_LINEAR;
+ so->tsc[1] |= G80_TSC_1_MIN_FILTER_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
- so->tsc[1] |= NV50_TSC_1_MINF_NEAREST;
+ so->tsc[1] |= G80_TSC_1_MIN_FILTER_NEAREST;
break;
}
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_LINEAR:
- so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR;
+ so->tsc[1] |= G80_TSC_1_MIP_FILTER_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NEAREST:
- so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
+ so->tsc[1] |= G80_TSC_1_MIP_FILTER_NEAREST;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
- so->tsc[1] |= NV50_TSC_1_MIPF_NONE;
+ so->tsc[1] |= G80_TSC_1_MIP_FILTER_NONE;
break;
}
if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) {
if (cso->seamless_cube_map)
- so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS;
+ so->tsc[1] |= GK104_TSC_1_CUBEMAP_INTERFACE_FILTERING;
if (!cso->normalized_coords)
- so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS;
+ so->tsc[1] |= GK104_TSC_1_FLOAT_COORD_NORMALIZATION_FORCE_UNNORMALIZED_COORDS;
+ } else {
+ so->seamless_cube_map = cso->seamless_cube_map;
}
if (cso->max_anisotropy >= 16)
@@ -521,10 +529,10 @@ nv50_sampler_state_create(struct pipe_context *pipe,
so->tsc[0] |= (cso->max_anisotropy >> 1) << 20;
if (cso->max_anisotropy >= 4)
- so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35;
+ so->tsc[1] |= 6 << G80_TSC_1_TRILIN_OPT__SHIFT;
else
if (cso->max_anisotropy >= 2)
- so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15;
+ so->tsc[1] |= 4 << G80_TSC_1_TRILIN_OPT__SHIFT;
}
if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
@@ -936,21 +944,10 @@ nv50_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *fb)
{
struct nv50_context *nv50 = nv50_context(pipe);
- unsigned i;
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
- for (i = 0; i < fb->nr_cbufs; ++i)
- pipe_surface_reference(&nv50->framebuffer.cbufs[i], fb->cbufs[i]);
- for (; i < nv50->framebuffer.nr_cbufs; ++i)
- pipe_surface_reference(&nv50->framebuffer.cbufs[i], NULL);
-
- nv50->framebuffer.nr_cbufs = fb->nr_cbufs;
-
- nv50->framebuffer.width = fb->width;
- nv50->framebuffer.height = fb->height;
-
- pipe_surface_reference(&nv50->framebuffer.zsbuf, fb->zsbuf);
+ util_copy_framebuffer_state(&nv50->framebuffer, fb);
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 4af969997f2..55369781606 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -2,7 +2,6 @@
#include "util/u_format.h"
#include "nv50/nv50_context.h"
-#include "nv50/nv50_defs.xml.h"
static inline void
nv50_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
index e0793bb6ec4..6bc451450b1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
@@ -7,6 +7,7 @@
struct nv50_tsc_entry {
int id;
uint32_t tsc[8];
+ bool seamless_cube_map;
};
static inline struct nv50_tsc_entry *
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index c69fa5abb98..4db73cb7fef 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -37,8 +37,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_resource.h"
-#include "nv50/nv50_defs.xml.h"
-#include "nv50/nv50_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
+#include "nv50/g80_texture.xml.h"
/* these are used in nv50_blit.h */
#define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL
@@ -64,15 +64,15 @@ nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
switch (util_format_get_blocksize(format)) {
case 1:
- return NV50_SURFACE_FORMAT_R8_UNORM;
+ return G80_SURFACE_FORMAT_R8_UNORM;
case 2:
- return NV50_SURFACE_FORMAT_R16_UNORM;
+ return G80_SURFACE_FORMAT_R16_UNORM;
case 4:
- return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+ return G80_SURFACE_FORMAT_BGRA8_UNORM;
case 8:
- return NV50_SURFACE_FORMAT_RGBA16_FLOAT;
+ return G80_SURFACE_FORMAT_RGBA16_FLOAT;
case 16:
- return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
+ return G80_SURFACE_FORMAT_RGBA32_FLOAT;
default:
return 0;
}
@@ -628,7 +628,7 @@ nv50_clear_buffer_push(struct pipe_context *pipe,
offset &= ~0xff;
BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
PUSH_DATA (push, 262144);
@@ -638,7 +638,7 @@ nv50_clear_buffer_push(struct pipe_context *pipe,
PUSH_DATA (push, buf->address + offset);
BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
PUSH_DATA (push, 0);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
PUSH_DATA (push, size);
PUSH_DATA (push, 1);
@@ -997,12 +997,14 @@ nv50_blitter_make_sampler(struct nv50_blitter *blit)
blit->sampler[0].id = -1;
- blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT);
+ blit->sampler[0].tsc[0] = G80_TSC_0_SRGB_CONVERSION |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_U__SHIFT) |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_V__SHIFT) |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_P__SHIFT);
blit->sampler[0].tsc[1] =
- NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE;
+ G80_TSC_1_MAG_FILTER_NEAREST |
+ G80_TSC_1_MIN_FILTER_NEAREST |
+ G80_TSC_1_MIP_FILTER_NONE;
/* clamp to edge, min/max lod = 0, bilinear filtering */
@@ -1010,7 +1012,9 @@ nv50_blitter_make_sampler(struct nv50_blitter *blit)
blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0];
blit->sampler[1].tsc[1] =
- NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE;
+ G80_TSC_1_MAG_FILTER_LINEAR |
+ G80_TSC_1_MIN_FILTER_LINEAR |
+ G80_TSC_1_MIP_FILTER_NONE;
}
unsigned
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
index c3f433608df..4b69c3bd504 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -22,32 +22,24 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_resource.h"
-#include "nv50/nv50_texture.xml.h"
-#include "nv50/nv50_defs.xml.h"
+#include "nv50/g80_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
#include "util/u_format.h"
-#define NV50_TIC_0_SWIZZLE__MASK \
- (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
- NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
-
static inline uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
+nv50_tic_swizzle(const struct nv50_format *fmt, unsigned swz, bool tex_int)
{
switch (swz) {
- case PIPE_SWIZZLE_RED:
- return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
- case PIPE_SWIZZLE_GREEN:
- return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
- case PIPE_SWIZZLE_BLUE:
- return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
- case PIPE_SWIZZLE_ALPHA:
- return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_RED : return fmt->tic.src_x;
+ case PIPE_SWIZZLE_GREEN: return fmt->tic.src_y;
+ case PIPE_SWIZZLE_BLUE : return fmt->tic.src_z;
+ case PIPE_SWIZZLE_ALPHA: return fmt->tic.src_w;
case PIPE_SWIZZLE_ONE:
- return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
case PIPE_SWIZZLE_ZERO:
default:
- return NV50_TIC_MAP_ZERO;
+ return G80_TIC_SOURCE_ZERO;
}
}
@@ -73,6 +65,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
{
const uint32_t class_3d = nouveau_context(pipe)->screen->class_3d;
const struct util_format_description *desc;
+ const struct nv50_format *fmt;
uint64_t addr;
uint32_t *tic;
uint32_t swz[4];
@@ -100,19 +93,23 @@ nv50_create_texture_view(struct pipe_context *pipe,
/* TIC[0] */
- tic[0] = nv50_format_table[view->pipe.format].tic;
+ fmt = &nv50_format_table[view->pipe.format];
tex_int = util_format_is_pure_integer(view->pipe.format);
- swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
- swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
- swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
- swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
- tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
- (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
- (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
- (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
- (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+ swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
+ tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
+ (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
+ (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
+ (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
+ (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
+ (swz[3] << G80_TIC_0_W_SOURCE__SHIFT);
addr = mt->base.address;
@@ -124,24 +121,24 @@ nv50_create_texture_view(struct pipe_context *pipe,
depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
}
- tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER;
+ tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+ tic[2] |= G80_TIC_2_SRGB_CONVERSION;
if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
- tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+ tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
if (target == PIPE_BUFFER) {
addr += view->pipe.u.buf.first_element * desc->block.bits / 8;
- tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER;
+ tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
tic[3] = 0;
tic[4] = /* width */
view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
tic[5] = 0;
} else {
- tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT;
+ tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
tic[3] = mt->level[0].pitch;
tic[4] = mt->base.base.width0;
tic[5] = (1 << 16) | (mt->base.base.height0);
@@ -162,34 +159,34 @@ nv50_create_texture_view(struct pipe_context *pipe,
switch (target) {
case PIPE_TEXTURE_1D:
- tic[2] |= NV50_TIC_2_TARGET_1D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
break;
case PIPE_TEXTURE_2D:
- tic[2] |= NV50_TIC_2_TARGET_2D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_RECT:
- tic[2] |= NV50_TIC_2_TARGET_RECT;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
break;
case PIPE_TEXTURE_3D:
- tic[2] |= NV50_TIC_2_TARGET_3D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
break;
case PIPE_TEXTURE_CUBE:
depth /= 6;
- tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
break;
case PIPE_TEXTURE_1D_ARRAY:
- tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
break;
case PIPE_TEXTURE_2D_ARRAY:
- tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
break;
case PIPE_TEXTURE_CUBE_ARRAY:
depth /= 6;
- tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
break;
case PIPE_BUFFER:
assert(0); /* should be linear and handled above ! */
- tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER | G80_TIC_2_LAYOUT_PITCH;
break;
default:
unreachable("unexpected/invalid texture target");
@@ -202,9 +199,9 @@ nv50_create_texture_view(struct pipe_context *pipe,
tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff;
tic[5] |= depth << 16;
if (class_3d > NV50_3D_CLASS)
- tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+ tic[5] |= mt->base.base.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT;
else
- tic[5] |= view->pipe.u.tex.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+ tic[5] |= view->pipe.u.tex.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT;
tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */
@@ -213,9 +210,9 @@ nv50_create_texture_view(struct pipe_context *pipe,
else
tic[7] = 0;
- if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)))
+ if (unlikely(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS)))
if (mt->base.base.last_level)
- tic[5] &= ~NV50_TIC_5_LAST_LEVEL__MASK;
+ tic[5] &= ~G80_TIC_5_MAP_MIP_LEVEL__MASK;
return &view->pipe;
}
@@ -265,7 +262,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
PUSH_DATA (push, 262144);
@@ -275,7 +272,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
PUSH_DATA (push, txc->offset);
BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
PUSH_DATA (push, 0);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
PUSH_DATA (push, 32);
PUSH_DATA (push, 1);
@@ -364,6 +361,7 @@ nv50_validate_tsc(struct nv50_context *nv50, int s)
PUSH_DATA (push, (i << 4) | 0);
continue;
}
+ nv50->seamless_cube_map = tsc->seamless_cube_map;
if (tsc->id < 0) {
tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h
deleted file mode 100644
index a2b9921f647..00000000000
--- a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h
+++ /dev/null
@@ -1,306 +0,0 @@
-#ifndef NV50_TEXTURE_XML
-#define NV50_TEXTURE_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/envytools/envytools/
-git clone https://github.com/envytools/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- rnndb/graph/g80_texture.xml ( 8881 bytes, from 2014-09-25 06:32:11)
-- rnndb/copyright.xml ( 6452 bytes, from 2013-05-14 03:57:49)
-- rnndb/nvchipsets.xml ( 2759 bytes, from 2014-10-05 01:51:02)
-- rnndb/g80_defs.xml ( 18175 bytes, from 2014-09-25 06:32:11)
-
-Copyright (C) 2006-2014 by the following authors:
-- Artur Huillet <[email protected]> (ahuillet)
-- Ben Skeggs (darktama, darktama_)
-- B. R. <[email protected]> (koala_br)
-- Carlos Martin <[email protected]> (carlosmn)
-- Christoph Bumiller <[email protected]> (calim, chrisbmr)
-- Dawid Gajownik <[email protected]> (gajownik)
-- Dmitry Baryshkov
-- Dmitry Eremin-Solenikov <[email protected]> (lumag)
-- EdB <[email protected]> (edb_)
-- Erik Waling <[email protected]> (erikwaling)
-- Francisco Jerez <[email protected]> (curro)
-- imirkin <[email protected]> (imirkin)
-- jb17bsome <[email protected]> (jb17bsome)
-- Jeremy Kolb <[email protected]> (kjeremy)
-- Laurent Carlier <[email protected]> (lordheavy)
-- Luca Barbieri <[email protected]> (lb, lb1)
-- Maarten Maathuis <[email protected]> (stillunknown)
-- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
-- Mark Carey <[email protected]> (careym)
-- Matthieu Castet <[email protected]> (mat-c)
-- nvidiaman <[email protected]> (nvidiaman)
-- Patrice Mandin <[email protected]> (pmandin, pmdata)
-- Pekka Paalanen <[email protected]> (pq, ppaalanen)
-- Peter Popov <[email protected]> (ironpeter)
-- Richard Hughes <[email protected]> (hughsient)
-- Rudi Cilibrasi <[email protected]> (cilibrar)
-- Serge Martin
-- Simon Raffeiner
-- Stephane Loeuillet <[email protected]> (leroutier)
-- Stephane Marchesin <[email protected]> (marcheu)
-- sturmflut <[email protected]> (sturmflut)
-- Sylvain Munaut <[email protected]>
-- Victor Stinner <[email protected]> (haypo)
-- Wladmir van der Laan <[email protected]> (miathan6)
-- Younes Manton <[email protected]> (ymanton)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-#define NV50_TIC_MAP_ZERO 0x00000000
-#define NV50_TIC_MAP_C0 0x00000002
-#define NV50_TIC_MAP_C1 0x00000003
-#define NV50_TIC_MAP_C2 0x00000004
-#define NV50_TIC_MAP_C3 0x00000005
-#define NV50_TIC_MAP_ONE_INT 0x00000006
-#define NV50_TIC_MAP_ONE_FLOAT 0x00000007
-#define NV50_TIC_TYPE_SNORM 0x00000001
-#define NV50_TIC_TYPE_UNORM 0x00000002
-#define NV50_TIC_TYPE_SINT 0x00000003
-#define NV50_TIC_TYPE_UINT 0x00000004
-#define NV50_TIC_TYPE_SSCALED 0x00000005
-#define NV50_TIC_TYPE_USCALED 0x00000006
-#define NV50_TIC_TYPE_FLOAT 0x00000007
-#define NV50_TSC_WRAP_REPEAT 0x00000000
-#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001
-#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
-#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003
-#define NV50_TSC_WRAP_CLAMP 0x00000004
-#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005
-#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006
-#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007
-#define NV50_TIC__SIZE 0x00000020
-#define NV50_TIC_0 0x00000000
-#define NV50_TIC_0_MAPA__MASK 0x38000000
-#define NV50_TIC_0_MAPA__SHIFT 27
-#define NV50_TIC_0_MAPB__MASK 0x07000000
-#define NV50_TIC_0_MAPB__SHIFT 24
-#define NV50_TIC_0_MAPG__MASK 0x00e00000
-#define NV50_TIC_0_MAPG__SHIFT 21
-#define NV50_TIC_0_MAPR__MASK 0x001c0000
-#define NV50_TIC_0_MAPR__SHIFT 18
-#define NV50_TIC_0_TYPE3__MASK 0x00038000
-#define NV50_TIC_0_TYPE3__SHIFT 15
-#define NV50_TIC_0_TYPE2__MASK 0x00007000
-#define NV50_TIC_0_TYPE2__SHIFT 12
-#define NV50_TIC_0_TYPE1__MASK 0x00000e00
-#define NV50_TIC_0_TYPE1__SHIFT 9
-#define NV50_TIC_0_TYPE0__MASK 0x000001c0
-#define NV50_TIC_0_TYPE0__SHIFT 6
-#define NV50_TIC_0_FMT__MASK 0x0000003f
-#define NV50_TIC_0_FMT__SHIFT 0
-#define NV50_TIC_0_FMT_32_32_32_32 0x00000001
-#define NVC0_TIC_0_FMT_32_32_32 0x00000002
-#define NV50_TIC_0_FMT_16_16_16_16 0x00000003
-#define NV50_TIC_0_FMT_32_32 0x00000004
-#define NV50_TIC_0_FMT_32_8_X24 0x00000005
-#define NV50_TIC_0_FMT_8_8_8_8 0x00000008
-#define NV50_TIC_0_FMT_10_10_10_2 0x00000009
-#define NV50_TIC_0_FMT_16_16 0x0000000c
-#define NV50_TIC_0_FMT_24_8 0x0000000d
-#define NV50_TIC_0_FMT_8_24 0x0000000e
-#define NV50_TIC_0_FMT_32 0x0000000f
-#define NVC0_TIC_0_FMT_BPTC_FLOAT 0x00000010
-#define NVC0_TIC_0_FMT_BPTC_UFLOAT 0x00000011
-#define NV50_TIC_0_FMT_4_4_4_4 0x00000012
-#define NV50_TIC_0_FMT_1_5_5_5 0x00000013
-#define NV50_TIC_0_FMT_5_5_5_1 0x00000014
-#define NV50_TIC_0_FMT_5_6_5 0x00000015
-#define NV50_TIC_0_FMT_5_5_6 0x00000016
-#define NVC0_TIC_0_FMT_BPTC 0x00000017
-#define NV50_TIC_0_FMT_8_8 0x00000018
-#define NV50_TIC_0_FMT_16 0x0000001b
-#define NV50_TIC_0_FMT_8 0x0000001d
-#define NV50_TIC_0_FMT_4_4 0x0000001e
-#define NV50_TIC_0_FMT_BITMAP 0x0000001f
-#define NV50_TIC_0_FMT_9_9_9_E5 0x00000020
-#define NV50_TIC_0_FMT_11_11_10 0x00000021
-#define NV50_TIC_0_FMT_U8_YA8_V8_YB8 0x00000022
-#define NV50_TIC_0_FMT_YA8_U8_YB8_V8 0x00000023
-#define NV50_TIC_0_FMT_DXT1 0x00000024
-#define NV50_TIC_0_FMT_DXT3 0x00000025
-#define NV50_TIC_0_FMT_DXT5 0x00000026
-#define NV50_TIC_0_FMT_RGTC1 0x00000027
-#define NV50_TIC_0_FMT_RGTC2 0x00000028
-#define NV50_TIC_0_FMT_S8_Z24 0x00000029
-#define NV50_TIC_0_FMT_Z24_X8 0x0000002a
-#define NV50_TIC_0_FMT_Z24_S8 0x0000002b
-#define NV50_TIC_0_FMT_Z24_C8_MS4_CS4 0x0000002c
-#define NV50_TIC_0_FMT_Z24_C8_MS8_CS8 0x0000002d
-#define NV50_TIC_0_FMT_Z24_C8_MS4_CS12 0x0000002e
-#define NV50_TIC_0_FMT_Z32 0x0000002f
-#define NV50_TIC_0_FMT_Z32_S8_X24 0x00000030
-#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS4 0x00000031
-#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS8_CS8 0x00000032
-#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS4 0x00000033
-#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS8_CS8 0x00000034
-#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS4 0x00000035
-#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS8_CS8 0x00000036
-#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS12 0x00000037
-#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS12 0x00000038
-#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS12 0x00000039
-#define NV50_TIC_0_FMT_Z16 0x0000003a
-
-#define NV50_TIC_1 0x00000004
-#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff
-#define NV50_TIC_1_OFFSET_LOW__SHIFT 0
-
-#define NV50_TIC_2 0x00000008
-#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff
-#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0
-#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400
-#define NV50_TIC_2_TARGET__MASK 0x0003c000
-#define NV50_TIC_2_TARGET__SHIFT 14
-#define NV50_TIC_2_TARGET_1D 0x00000000
-#define NV50_TIC_2_TARGET_2D 0x00004000
-#define NV50_TIC_2_TARGET_3D 0x00008000
-#define NV50_TIC_2_TARGET_CUBE 0x0000c000
-#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000
-#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000
-#define NV50_TIC_2_TARGET_BUFFER 0x00018000
-#define NV50_TIC_2_TARGET_RECT 0x0001c000
-#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000
-#define NV50_TIC_2_LINEAR 0x00040000
-#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000
-#define NV50_TIC_2_TILE_MODE_X__SHIFT 19
-#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000
-#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22
-#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000
-#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25
-#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000
-#define NV50_TIC_2_2D_UNK0258__SHIFT 28
-#define NV50_TIC_2_NO_BORDER 0x40000000
-#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000
-
-#define NV50_TIC_3 0x0000000c
-#define NV50_TIC_3_PITCH__MASK 0xffffffff
-#define NV50_TIC_3_PITCH__SHIFT 0
-
-#define NV50_TIC_4 0x00000010
-#define NV50_TIC_4_WIDTH__MASK 0xffffffff
-#define NV50_TIC_4_WIDTH__SHIFT 0
-
-#define NV50_TIC_5 0x00000014
-#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000
-#define NV50_TIC_5_LAST_LEVEL__SHIFT 28
-#define NV50_TIC_5_DEPTH__MASK 0x0fff0000
-#define NV50_TIC_5_DEPTH__SHIFT 16
-#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff
-#define NV50_TIC_5_HEIGHT__SHIFT 0
-
-#define NV50_TIC_7 0x0000001c
-#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f
-#define NV50_TIC_7_BASE_LEVEL__SHIFT 0
-#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0
-#define NV50_TIC_7_MAX_LEVEL__SHIFT 4
-#define NV50_TIC_7_MS_MODE__MASK 0x0000f000
-#define NV50_TIC_7_MS_MODE__SHIFT 12
-#define NV50_TIC_7_MS_MODE_MS1 0x00000000
-#define NV50_TIC_7_MS_MODE_MS2 0x00001000
-#define NV50_TIC_7_MS_MODE_MS4 0x00002000
-#define NV50_TIC_7_MS_MODE_MS8 0x00003000
-#define NVA3_TIC_7_MS_MODE_MS8_ALT 0x00004000
-#define NVA3_TIC_7_MS_MODE_MS2_ALT 0x00005000
-#define NVC0_TIC_7_MS_MODE_UNK6 0x00006000
-#define NV50_TIC_7_MS_MODE_MS4_CS4 0x00008000
-#define NV50_TIC_7_MS_MODE_MS4_CS12 0x00009000
-#define NV50_TIC_7_MS_MODE_MS8_CS8 0x0000a000
-#define NVC0_TIC_7_MS_MODE_MS8_CS24 0x0000b000
-
-#define NV50_TSC__SIZE 0x00000020
-#define NV50_TSC_0 0x00000000
-#define NV50_TSC_0_WRAPS__MASK 0x00000007
-#define NV50_TSC_0_WRAPS__SHIFT 0
-#define NV50_TSC_0_WRAPT__MASK 0x00000038
-#define NV50_TSC_0_WRAPT__SHIFT 3
-#define NV50_TSC_0_WRAPR__MASK 0x000001c0
-#define NV50_TSC_0_WRAPR__SHIFT 6
-#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200
-#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00
-#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10
-#define NV50_TSC_0_SRGB_CONVERSION_ALLOWED 0x00002000
-#define NV50_TSC_0_BOX_S__MASK 0x0001c000
-#define NV50_TSC_0_BOX_S__SHIFT 14
-#define NV50_TSC_0_BOX_T__MASK 0x000e0000
-#define NV50_TSC_0_BOX_T__SHIFT 17
-#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000
-#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20
-
-#define NV50_TSC_1 0x00000004
-#define NV50_TSC_1_UNKN_ANISO_15 0x10000000
-#define NV50_TSC_1_UNKN_ANISO_35 0x18000000
-#define NV50_TSC_1_MAGF__MASK 0x00000003
-#define NV50_TSC_1_MAGF__SHIFT 0
-#define NV50_TSC_1_MAGF_NEAREST 0x00000001
-#define NV50_TSC_1_MAGF_LINEAR 0x00000002
-#define NV50_TSC_1_MINF__MASK 0x00000030
-#define NV50_TSC_1_MINF__SHIFT 4
-#define NV50_TSC_1_MINF_NEAREST 0x00000010
-#define NV50_TSC_1_MINF_LINEAR 0x00000020
-#define NV50_TSC_1_MIPF__MASK 0x000000c0
-#define NV50_TSC_1_MIPF__SHIFT 6
-#define NV50_TSC_1_MIPF_NONE 0x00000040
-#define NV50_TSC_1_MIPF_NEAREST 0x00000080
-#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
-#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200
-#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
-#define NV50_TSC_1_LOD_BIAS__SHIFT 12
-#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000
-
-#define NV50_TSC_2 0x00000008
-#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff
-#define NV50_TSC_2_MIN_LOD__SHIFT 0
-#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000
-#define NV50_TSC_2_MAX_LOD__SHIFT 12
-#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__MASK 0xff000000
-#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__SHIFT 24
-
-#define NV50_TSC_3 0x0000000c
-#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__MASK 0x000ff000
-#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__SHIFT 12
-#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__MASK 0x0ff00000
-#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__SHIFT 20
-
-#define NV50_TSC_4 0x00000010
-#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff
-#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0
-
-#define NV50_TSC_5 0x00000014
-#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff
-#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0
-
-#define NV50_TSC_6 0x00000018
-#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff
-#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0
-
-#define NV50_TSC_7 0x0000001c
-#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff
-#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0
-
-
-#endif /* NV50_TEXTURE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
index 9a3fd1e705f..86a8c159469 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -3,7 +3,7 @@
#include "nv50/nv50_context.h"
-#include "nv50/nv50_defs.xml.h"
+#include "nv50/g80_defs.xml.h"
struct nv50_transfer {
struct pipe_transfer base;
@@ -163,7 +163,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
offset &= ~0xff;
BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
PUSH_DATA (push, 262144);
@@ -173,7 +173,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
PUSH_DATA (push, dst->offset + offset);
BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
PUSH_DATA (push, 0);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
PUSH_DATA (push, size);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 5369d5207ee..6f60445d8d2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -76,7 +76,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
enum pipe_format fmt = ve->src_format;
so->element[i].pipe = elements[i];
- so->element[i].state = nv50_format_table[fmt].vtx;
+ so->element[i].state = nv50_vertex_format[fmt].vtx;
if (!so->element[i].state) {
switch (util_format_get_nr_components(fmt)) {
@@ -89,7 +89,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
FREE(so);
return NULL;
}
- so->element[i].state = nv50_format_table[fmt].vtx;
+ so->element[i].state = nv50_vertex_format[fmt].vtx;
so->need_conversion = true;
pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
"Converting vertex element %d, no hw format %s",
@@ -816,6 +816,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, 0x20);
}
+ if (nv50->screen->base.class_3d >= NVA0_3D_CLASS &&
+ nv50->seamless_cube_map != nv50->state.seamless_cube_map) {
+ nv50->state.seamless_cube_map = nv50->seamless_cube_map;
+ BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
+ PUSH_DATA (push, nv50->seamless_cube_map ? NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);
+ }
+
if (nv50->vbo_fifo) {
nv50_push_vbo(nv50, info);
push->kick_notify = nv50_default_kick_notify;
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index 0a0e187dc02..3479c343261 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -192,6 +192,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVF0_3D_CLASS 0x0000a197
#define NVEA_3D_CLASS 0x0000a297
#define GM107_3D_CLASS 0x0000b097
+#define GM200_3D_CLASS 0x0000b197
#define NV50_2D_CLASS 0x0000502d
#define NVC0_2D_CLASS 0x0000902d
#define NV50_COMPUTE_CLASS 0x000050c0
@@ -200,6 +201,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC8_COMPUTE_CLASS 0x000092c0
#define NVE4_COMPUTE_CLASS 0x0000a0c0
#define NVF0_COMPUTE_CLASS 0x0000a1c0
+#define GM107_COMPUTE_CLASS 0x0000b0c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
diff --git a/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h b/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h
new file mode 100644
index 00000000000..a4bc3805f26
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h
@@ -0,0 +1,365 @@
+#ifndef GM107_TEXTURE_XML
+#define GM107_TEXTURE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/gm107_texture.xml ( 22057 bytes, from 2016-02-12 03:01:43)
+- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40)
+- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-04 22:19:11)
+- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42)
+
+Copyright (C) 2006-2016 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define GM107_TIC2__SIZE 0x00000020
+#define GM107_TIC2_0 0x00000000
+#define GM107_TIC2_0_COMPONENTS_SIZES__MASK 0x0000007f
+#define GM107_TIC2_0_COMPONENTS_SIZES__SHIFT 0
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32_B32_A32 0x00000001
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32_B32 0x00000002
+#define GM107_TIC2_0_COMPONENTS_SIZES_R16_G16_B16_A16 0x00000003
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32 0x00000004
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32_B24G8 0x00000005
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8B8G8R8 0x00000007
+#define GM107_TIC2_0_COMPONENTS_SIZES_A8B8G8R8 0x00000008
+#define GM107_TIC2_0_COMPONENTS_SIZES_A2B10G10R10 0x00000009
+#define GM107_TIC2_0_COMPONENTS_SIZES_R16_G16 0x0000000c
+#define GM107_TIC2_0_COMPONENTS_SIZES_G8R24 0x0000000d
+#define GM107_TIC2_0_COMPONENTS_SIZES_G24R8 0x0000000e
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32 0x0000000f
+#define GM107_TIC2_0_COMPONENTS_SIZES_A4B4G4R4 0x00000012
+#define GM107_TIC2_0_COMPONENTS_SIZES_A5B5G5R1 0x00000013
+#define GM107_TIC2_0_COMPONENTS_SIZES_A1B5G5R5 0x00000014
+#define GM107_TIC2_0_COMPONENTS_SIZES_B5G6R5 0x00000015
+#define GM107_TIC2_0_COMPONENTS_SIZES_B6G5R5 0x00000016
+#define GM107_TIC2_0_COMPONENTS_SIZES_G8R8 0x00000018
+#define GM107_TIC2_0_COMPONENTS_SIZES_R16 0x0000001b
+#define GM107_TIC2_0_COMPONENTS_SIZES_Y8_VIDEO 0x0000001c
+#define GM107_TIC2_0_COMPONENTS_SIZES_R8 0x0000001d
+#define GM107_TIC2_0_COMPONENTS_SIZES_G4R4 0x0000001e
+#define GM107_TIC2_0_COMPONENTS_SIZES_R1 0x0000001f
+#define GM107_TIC2_0_COMPONENTS_SIZES_E5B9G9R9_SHAREDEXP 0x00000020
+#define GM107_TIC2_0_COMPONENTS_SIZES_BF10GF11RF11 0x00000021
+#define GM107_TIC2_0_COMPONENTS_SIZES_G8B8G8R8 0x00000022
+#define GM107_TIC2_0_COMPONENTS_SIZES_B8G8R8G8 0x00000023
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXT1 0x00000024
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXT23 0x00000025
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXT45 0x00000026
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXN1 0x00000027
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXN2 0x00000028
+#define GM107_TIC2_0_COMPONENTS_SIZES_BC6H_SF16 0x00000010
+#define GM107_TIC2_0_COMPONENTS_SIZES_BC6H_UF16 0x00000011
+#define GM107_TIC2_0_COMPONENTS_SIZES_BC7U 0x00000017
+#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGB 0x00000006
+#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGB_PTA 0x0000000a
+#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGBA 0x0000000b
+#define GM107_TIC2_0_COMPONENTS_SIZES_EAC 0x00000019
+#define GM107_TIC2_0_COMPONENTS_SIZES_EACX2 0x0000001a
+#define GM107_TIC2_0_COMPONENTS_SIZES_Z24S8 0x00000029
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24 0x0000002a
+#define GM107_TIC2_0_COMPONENTS_SIZES_S8Z24 0x0000002b
+#define GM107_TIC2_0_COMPONENTS_SIZES_X4V4Z24__COV4R4V 0x0000002c
+#define GM107_TIC2_0_COMPONENTS_SIZES_X4V4Z24__COV8R8V 0x0000002d
+#define GM107_TIC2_0_COMPONENTS_SIZES_V8Z24__COV4R12V 0x0000002e
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32 0x0000002f
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X24S8 0x00000030
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV4R4V 0x00000031
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV8R8V 0x00000032
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV4R4V 0x00000033
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV8R8V 0x00000034
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV4R4V 0x00000035
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV8R8V 0x00000036
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV4R12V 0x00000037
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV4R12V 0x00000038
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV4R12V 0x00000039
+#define GM107_TIC2_0_COMPONENTS_SIZES_Z16 0x0000003a
+#define GM107_TIC2_0_COMPONENTS_SIZES_V8Z24__COV8R24V 0x0000003b
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV8R24V 0x0000003c
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV8R24V 0x0000003d
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV8R24V 0x0000003e
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_4X4 0x00000040
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_5X4 0x00000050
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_5X5 0x00000041
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_6X5 0x00000051
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_6X6 0x00000042
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X5 0x00000055
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X6 0x00000052
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X8 0x00000044
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X5 0x00000056
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X6 0x00000057
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X8 0x00000053
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X10 0x00000045
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_12X10 0x00000054
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_12X12 0x00000046
+#define GM107_TIC2_0_R_DATA_TYPE__MASK 0x00000380
+#define GM107_TIC2_0_R_DATA_TYPE__SHIFT 7
+#define GM107_TIC2_0_G_DATA_TYPE__MASK 0x00001c00
+#define GM107_TIC2_0_G_DATA_TYPE__SHIFT 10
+#define GM107_TIC2_0_B_DATA_TYPE__MASK 0x0000e000
+#define GM107_TIC2_0_B_DATA_TYPE__SHIFT 13
+#define GM107_TIC2_0_A_DATA_TYPE__MASK 0x00070000
+#define GM107_TIC2_0_A_DATA_TYPE__SHIFT 16
+#define GM107_TIC2_0_X_SOURCE__MASK 0x00380000
+#define GM107_TIC2_0_X_SOURCE__SHIFT 19
+#define GM107_TIC2_0_Y_SOURCE__MASK 0x01c00000
+#define GM107_TIC2_0_Y_SOURCE__SHIFT 22
+#define GM107_TIC2_0_Z_SOURCE__MASK 0x0e000000
+#define GM107_TIC2_0_Z_SOURCE__SHIFT 25
+#define GM107_TIC2_0_W_SOURCE__MASK 0x70000000
+#define GM107_TIC2_0_W_SOURCE__SHIFT 28
+#define GM107_TIC2_0_PACK_COMPONENTS 0x80000000
+
+#define GM107_TIC2_1 0x00000004
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_0__MASK 0xffffffff
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_0__SHIFT 0
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__MASK 0xffffffe0
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__SHIFT 5
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__SHR 5
+#define GM107_TIC2_1_GOB_DEPTH_OFFSET__MASK 0x00000060
+#define GM107_TIC2_1_GOB_DEPTH_OFFSET__SHIFT 5
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__MASK 0xfffffe00
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__SHIFT 9
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__SHR 9
+
+#define GM107_TIC2_2 0x00000008
+#define GM107_TIC2_2_ADDRESS_BITS_47_TO_32__MASK 0x0000ffff
+#define GM107_TIC2_2_ADDRESS_BITS_47_TO_32__SHIFT 0
+#define GM107_TIC2_2_HEADER_VERSION__MASK 0x00e00000
+#define GM107_TIC2_2_HEADER_VERSION__SHIFT 21
+#define GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER 0x00000000
+#define GM107_TIC2_2_HEADER_VERSION_PITCH_COLORKEY 0x00200000
+#define GM107_TIC2_2_HEADER_VERSION_PITCH 0x00400000
+#define GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR 0x00600000
+#define GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR_COLORKEY 0x00800000
+#define GM107_TIC2_2_RESOURCE_VIEW_COHERENCY_HASH__MASK 0x1e000000
+#define GM107_TIC2_2_RESOURCE_VIEW_COHERENCY_HASH__SHIFT 25
+
+#define GM107_TIC2_3 0x0000000c
+#define GM107_TIC2_3_WIDTH_MINUS_ONE_BITS_31_TO_16__MASK 0x0000ffff
+#define GM107_TIC2_3_WIDTH_MINUS_ONE_BITS_31_TO_16__SHIFT 0
+#define GM107_TIC2_3_PITCH_BITS_20_TO_5__MASK 0x0000ffff
+#define GM107_TIC2_3_PITCH_BITS_20_TO_5__SHIFT 0
+#define GM107_TIC2_3_PITCH_BITS_20_TO_5__SHR 5
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MASK 0x00000007
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__SHIFT 0
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MIN 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MAX 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_ONE 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_TWO 0x00000001
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_FOUR 0x00000002
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_EIGHT 0x00000003
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_SIXTEEN 0x00000004
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_THIRTYTWO 0x00000005
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT__MASK 0x00000038
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT__SHIFT 3
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_ONE 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_TWO 0x00000008
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_FOUR 0x00000010
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_EIGHT 0x00000018
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_SIXTEEN 0x00000020
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_THIRTYTWO 0x00000028
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH__MASK 0x000001c0
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH__SHIFT 6
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_ONE 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_TWO 0x00000040
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_FOUR 0x00000080
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_EIGHT 0x000000c0
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_SIXTEEN 0x00000100
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_THIRTYTWO 0x00000140
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS__MASK 0x00001c00
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS__SHIFT 10
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_ONE 0x00000000
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_TWO 0x00000400
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_FOUR 0x00000800
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_EIGHT 0x00000c00
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_SIXTEEN 0x00001000
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_THIRTYTWO 0x00001400
+#define GM107_TIC2_3_GOB_3D 0x00002000
+#define GM107_TIC2_3_LOD_ANISO_QUALITY_2 0x00010000
+#define GM107_TIC2_3_LOD_ANISO_QUALITY__MASK 0x00020000
+#define GM107_TIC2_3_LOD_ANISO_QUALITY__SHIFT 17
+#define GM107_TIC2_3_LOD_ANISO_QUALITY_LOW 0x00000000
+#define GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH 0x00020000
+#define GM107_TIC2_3_LOD_ISO_QUALITY__MASK 0x00040000
+#define GM107_TIC2_3_LOD_ISO_QUALITY__SHIFT 18
+#define GM107_TIC2_3_LOD_ISO_QUALITY_LOW 0x00000000
+#define GM107_TIC2_3_LOD_ISO_QUALITY_HIGH 0x00040000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER__MASK 0x00180000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER__SHIFT 19
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_NONE 0x00000000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_ONE 0x00080000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_TWO 0x00100000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_SQRT 0x00180000
+#define GM107_TIC2_3_ANISO_SPREAD_SCALE__MASK 0x03e00000
+#define GM107_TIC2_3_ANISO_SPREAD_SCALE__SHIFT 21
+#define GM107_TIC2_3_USE_HEADER_OPT_CONTROL 0x04000000
+#define GM107_TIC2_3_DEPTH_TEXTURE 0x08000000
+#define GM107_TIC2_3_MAX_MIP_LEVEL__MASK 0xf0000000
+#define GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT 28
+
+#define GM107_TIC2_4 0x00000010
+#define GM107_TIC2_4_WIDTH_MINUS_ONE_BITS_15_TO_0__MASK 0x0000ffff
+#define GM107_TIC2_4_WIDTH_MINUS_ONE_BITS_15_TO_0__SHIFT 0
+#define GM107_TIC2_4_WIDTH_MINUS_ONE__MASK 0x0000ffff
+#define GM107_TIC2_4_WIDTH_MINUS_ONE__SHIFT 0
+#define GM107_TIC2_4_ANISO_SPREAD_MAX_LOG2__MASK 0x00380000
+#define GM107_TIC2_4_ANISO_SPREAD_MAX_LOG2__SHIFT 19
+#define GM107_TIC2_4_SRGB_CONVERSION 0x00400000
+#define GM107_TIC2_4_TEXTURE_TYPE__MASK 0x07800000
+#define GM107_TIC2_4_TEXTURE_TYPE__SHIFT 23
+#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D 0x00000000
+#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D 0x00800000
+#define GM107_TIC2_4_TEXTURE_TYPE_THREE_D 0x01000000
+#define GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP 0x01800000
+#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY 0x02000000
+#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY 0x02800000
+#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER 0x03000000
+#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP 0x03800000
+#define GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY 0x04000000
+#define GM107_TIC2_4_SECTOR_PROMOTION__MASK 0x18000000
+#define GM107_TIC2_4_SECTOR_PROMOTION__SHIFT 27
+#define GM107_TIC2_4_SECTOR_PROMOTION_NO_PROMOTION 0x00000000
+#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V 0x08000000
+#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_H 0x10000000
+#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_4 0x18000000
+#define GM107_TIC2_4_BORDER_SIZE__MASK 0xe0000000
+#define GM107_TIC2_4_BORDER_SIZE__SHIFT 29
+#define GM107_TIC2_4_BORDER_SIZE_ONE 0x00000000
+#define GM107_TIC2_4_BORDER_SIZE_TWO 0x20000000
+#define GM107_TIC2_4_BORDER_SIZE_FOUR 0x40000000
+#define GM107_TIC2_4_BORDER_SIZE_EIGHT 0x60000000
+#define GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR 0xe0000000
+
+#define GM107_TIC2_5 0x00000014
+#define GM107_TIC2_5_HEIGHT_MINUS_ONE__MASK 0x0000ffff
+#define GM107_TIC2_5_HEIGHT_MINUS_ONE__SHIFT 0
+#define GM107_TIC2_5_DEPTH_MINUS_ONE__MASK 0x3fff0000
+#define GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT 16
+#define GM107_TIC2_5_NORMALIZED_COORDS 0x80000000
+
+#define GM107_TIC2_6 0x00000018
+#define GM107_TIC2_6_COLOR_KEY_OP 0x00000001
+#define GM107_TIC2_6_TRILIN_OPT__MASK 0x0000003e
+#define GM107_TIC2_6_TRILIN_OPT__SHIFT 1
+#define GM107_TIC2_6_MIP_LOD_BIAS__MASK 0x0007ffc0
+#define GM107_TIC2_6_MIP_LOD_BIAS__SHIFT 6
+#define GM107_TIC2_6_MIP_LOD_BIAS__RADIX 0x00000008
+#define GM107_TIC2_6_ANISO_BIAS__MASK 0x00780000
+#define GM107_TIC2_6_ANISO_BIAS__SHIFT 19
+#define GM107_TIC2_6_ANISO_BIAS__RADIX 0x00000004
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC__MASK 0x01800000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC__SHIFT 23
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_HALF 0x00000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_ONE 0x00800000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO 0x01000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_MAX 0x01800000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC__MASK 0x06000000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC__SHIFT 25
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_HALF 0x00000000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE 0x02000000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_TWO 0x04000000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_MAX 0x06000000
+#define GM107_TIC2_6_MAX_ANISOTROPY__MASK 0x38000000
+#define GM107_TIC2_6_MAX_ANISOTROPY__SHIFT 27
+#define GM107_TIC2_6_MAX_ANISOTROPY_1_TO_1 0x00000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1 0x08000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_4_TO_1 0x10000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_6_TO_1 0x18000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_8_TO_1 0x20000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_10_TO_1 0x28000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_12_TO_1 0x30000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_16_TO_1 0x38000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER__MASK 0xc0000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER__SHIFT 30
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_NONE 0x00000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_ONE 0x40000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO 0x80000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_SQRT 0xc0000000
+
+#define GM107_TIC2_7 0x0000001c
+#define GM107_TIC2_7_COLOR_KEY_VALUE__MASK 0xffffffff
+#define GM107_TIC2_7_COLOR_KEY_VALUE__SHIFT 0
+#define GM107_TIC2_7_RES_VIEW_MIN_MIP_LEVEL__MASK 0x0000000f
+#define GM107_TIC2_7_RES_VIEW_MIN_MIP_LEVEL__SHIFT 0
+#define GM107_TIC2_7_RES_VIEW_MAX_MIP_LEVEL__MASK 0x000000f0
+#define GM107_TIC2_7_RES_VIEW_MAX_MIP_LEVEL__SHIFT 4
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT__MASK 0x00000f00
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT 8
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_1X1 0x00000000
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X1 0x00000100
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2 0x00000200
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2 0x00000300
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_D3D 0x00000400
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X1_D3D 0x00000500
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X4 0x00000600
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2_VC_4 0x00000800
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2_VC_12 0x00000900
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_VC_8 0x00000a00
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_VC_24 0x00000b00
+#define GM107_TIC2_7_MIN_LOD_CLAMP__MASK 0x00fff000
+#define GM107_TIC2_7_MIN_LOD_CLAMP__SHIFT 12
+#define GM107_TIC2_7_MIN_LOD_CLAMP__RADIX 0x00000008
+
+
+#endif /* GM107_TEXTURE_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/Makefile b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
index 1c0f5835973..52fb0a54812 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/Makefile
+++ b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
@@ -1,5 +1,5 @@
ENVYAS?=envyas
-TARGETS=com9097.mme.h
+TARGETS=com9097.mme.h com90c0.mme.h
all: $(TARGETS)
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
new file mode 100644
index 00000000000..a9233ad8015
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
@@ -0,0 +1,24 @@
+/* NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT
+ *
+ * arg = num_groups_x
+ * parm[0] = num_groups_y
+ * parm[1] = num_groups_z
+ */
+.section #mme90c0_launch_grid_indirect
+ parm $r2 maddr 0x108e /* GRIDDIM_YX */
+ braz $r1 #fail
+ parm $r3
+ braz annul $r2 #fail
+ braz annul $r3 #fail
+ send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* num_groups_y << 16 | num_groups_x */
+ send $r3
+ maddrsend 0xa7 /* COMPUTE_BEGIN */
+ maddrsend 0x282 /* UNKA08 */
+ maddr 0xda /* LAUNCH */
+ send 0x1000
+ maddrsend 0x281 /* COMPUTE_END */
+ exit maddr 0xd8 /* UNK360 */
+ send 0x1
+fail:
+ exit
+ nop
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
new file mode 100644
index 00000000000..1dc06e5e690
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
@@ -0,0 +1,19 @@
+uint32_t mme90c0_launch_grid_indirect[] = {
+ 0x04238251,
+ 0x00034807,
+ 0x00000301,
+/* 0x000e: fail */
+ 0x0002d027,
+ 0x00029827,
+ 0x84008842,
+ 0x00001841,
+ 0x0029c071,
+ 0x00a08071,
+ 0x00368021,
+ 0x04000041,
+ 0x00a04071,
+ 0x003600a1,
+ 0x00004041,
+ 0x00000091,
+ 0x00000011,
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 71804343138..0f1265f5db5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -59,53 +59,63 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
if (ret)
return ret;
- BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->compute->oclass);
/* hardware limit */
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
PUSH_DATA (push, screen->mp_count);
- BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
+ BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
PUSH_DATA (push, 0xf);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
PUSH_DATA (push, 0x8000);
/* global memory setup */
- BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
PUSH_DATA (push, 0);
- BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
+ BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
for (i = 0; i <= 0xff; i++)
PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
PUSH_DATA (push, 1);
/* local memory and cstack setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->tls->offset);
PUSH_DATA (push, screen->tls->offset);
- BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
+ BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
PUSH_DATAh(push, screen->tls->size);
PUSH_DATA (push, screen->tls->size);
- BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
+ BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
- PUSH_DATA (push, 1 << 24);
+ BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
+ PUSH_DATA (push, 0xff << 24);
/* shared memory setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
+ BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
- BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
- PUSH_DATA (push, 2 << 24);
- BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
+ BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
+ PUSH_DATA (push, 0xfe << 24);
+ BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
PUSH_DATA (push, 0);
/* code segment setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
- /* TODO: textures & samplers */
+ /* textures */
+ BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
+
+ /* samplers */
+ BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
return 0;
}
@@ -130,7 +140,7 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
if (likely(prog->code_size)) {
if (nvc0_program_upload_code(nvc0, prog)) {
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
return true;
}
@@ -138,13 +148,149 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
return false;
}
+static void
+nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
+{
+ bool need_flush = nvc0_validate_tsc(nvc0, 5);
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+static void
+nvc0_compute_validate_textures(struct nvc0_context *nvc0)
+{
+ bool need_flush = nvc0_validate_tic(nvc0, 5);
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+static void
+nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const int s = 5;
+
+ while (nvc0->constbuf_dirty[s]) {
+ int i = ffs(nvc0->constbuf_dirty[s]) - 1;
+ nvc0->constbuf_dirty[s] &= ~(1 << i);
+
+ if (nvc0->constbuf[s][i].user) {
+ struct nouveau_bo *bo = nvc0->screen->uniform_bo;
+ const unsigned base = s << 16;
+ const unsigned size = nvc0->constbuf[s][0].size;
+ assert(i == 0); /* we really only want OpenGL uniforms here */
+ assert(nvc0->constbuf[s][0].u.data);
+
+ if (nvc0->state.uniform_buffer_bound[s] < size) {
+ nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
+
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
+ PUSH_DATAh(push, bo->offset + base);
+ PUSH_DATA (push, bo->offset + base);
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+ }
+ nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
+ base, nvc0->state.uniform_buffer_bound[s],
+ 0, (size + 3) / 4,
+ nvc0->constbuf[s][0].u.data);
+ } else {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->constbuf[s][i].u.buf);
+ if (res) {
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->constbuf[s][i].size);
+ PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
+ PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
+ PUSH_DATA (push, (i << 8) | 1);
+
+ BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
+
+ res->cb_bindings[s] |= 1 << i;
+ } else {
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
+ PUSH_DATA (push, (i << 8) | 0);
+ }
+ if (i == 0)
+ nvc0->state.uniform_buffer_bound[s] = 0;
+ }
+ }
+
+ BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
+}
+
+static void
+nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
+ PUSH_DATA (push, (15 << 8) | 1);
+
+ nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;
+}
+
+static void
+nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const int s = 5;
+ int i;
+
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
+ PUSH_DATA (push, 512);
+
+ for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
+ if (nvc0->buffers[s][i].buffer) {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->buffers[s][i].buffer);
+ PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
+ PUSH_DATA (push, 0);
+ BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
+ } else {
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ }
+ }
+}
+
static bool
nvc0_compute_state_validate(struct nvc0_context *nvc0)
{
if (!nvc0_compute_validate_program(nvc0))
return false;
-
- /* TODO: textures, samplers, surfaces, global memory buffers */
+ if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF)
+ nvc0_compute_validate_constbufs(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST)
+ nvc0_compute_validate_driverconst(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_BUFFERS)
+ nvc0_compute_validate_buffers(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
+ nvc0_compute_validate_textures(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
+ nvc0_compute_validate_samplers(nvc0);
+
+ /* TODO: surfaces, global memory buffers */
nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
@@ -166,32 +312,29 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
struct nvc0_program *cp = nvc0->compprog;
if (cp->parm_size) {
- BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
PUSH_DATA (push, align(cp->parm_size, 0x100));
PUSH_DATAh(push, screen->parm->offset);
PUSH_DATA (push, screen->parm->offset);
- BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
PUSH_DATA (push, (0 << 8) | 1);
/* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
- BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
+ BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
PUSH_DATA (push, 0);
PUSH_DATAp(push, input, cp->parm_size / 4);
- BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
}
}
void
-nvc0_launch_grid(struct pipe_context *pipe,
- const uint *block_layout, const uint *grid_layout,
- uint32_t label,
- const void *input)
+nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *cp = nvc0->compprog;
- unsigned s, i;
+ unsigned s;
int ret;
ret = !nvc0_compute_state_validate(nvc0);
@@ -200,59 +343,69 @@ nvc0_launch_grid(struct pipe_context *pipe,
return;
}
- nvc0_compute_upload_input(nvc0, input);
+ nvc0_compute_upload_input(nvc0, info->input);
- BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
- PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
+ BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
+ PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
- BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
+ BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
PUSH_DATA (push, 0);
PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
- BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
+ BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
- PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
+ PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
PUSH_DATA (push, cp->num_barriers);
- BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
+ BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
PUSH_DATA (push, cp->num_gprs);
- /* grid/block setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
- PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
- PUSH_DATA (push, grid_layout[2]);
- BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
- PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
- PUSH_DATA (push, block_layout[2]);
-
/* launch preliminary setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
+ BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
PUSH_DATA (push, 0x1);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
- /* kernel launching */
- BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
- PUSH_DATA (push, 0x1000);
- BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
- PUSH_DATA (push, 0x1);
+ /* block setup */
+ BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
+ PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
+ PUSH_DATA (push, info->block[2]);
+
+ if (unlikely(info->indirect)) {
+ struct nv04_resource *res = nv04_resource(info->indirect);
+ uint32_t offset = res->offset + info->indirect_offset;
+ unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
+
+ nouveau_pushbuf_space(push, 16, 0, 1);
+ PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
+ PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
+ nouveau_pushbuf_data(push, res->bo, offset,
+ NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
+ } else {
+ /* grid setup */
+ BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
+ PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
+ PUSH_DATA (push, info->grid[2]);
+
+ /* kernel launching */
+ BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
+ PUSH_DATA (push, 0x1000);
+ BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
+ PUSH_DATA (push, 0x1);
+ }
- /* rebind all the 3D constant buffers
- * (looks like binding a CB on COMPUTE clobbers 3D state) */
- nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
+ nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
for (s = 0; s < 5; s++) {
- for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
- if (nvc0->constbuf[s][i].u.buf)
- nvc0->constbuf_dirty[s] |= 1 << i;
+ nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
+ nvc0->state.uniform_buffer_bound[s] = 0;
}
- memset(nvc0->state.uniform_buffer_bound, 0,
- sizeof(nvc0->state.uniform_buffer_bound));
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
index 168a6d1bee2..a23f7f39dda 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
@@ -1,7 +1,6 @@
#ifndef NVC0_COMPUTE_H
#define NVC0_COMPUTE_H
-#include "nv50/nv50_defs.xml.h"
#include "nvc0/nvc0_compute.xml.h"
bool
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 547b8f5d309..007cccfd10b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -194,8 +194,8 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
if (!--ref)
return ref;
}
@@ -204,8 +204,8 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
if (!--ref)
return ref;
}
@@ -214,16 +214,16 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
if (res->target == PIPE_BUFFER) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
- nvc0->dirty |= NVC0_NEW_ARRAYS;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+ nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX);
if (!--ref)
return ref;
}
}
if (nvc0->idxbuf.buffer == res) {
- nvc0->dirty |= NVC0_NEW_IDXBUF;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX);
+ nvc0->dirty_3d |= NVC0_NEW_3D_IDXBUF;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
if (!--ref)
return ref;
}
@@ -233,35 +233,45 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
if (nvc0->textures[s][i] &&
nvc0->textures[s][i]->texture == res) {
nvc0->textures_dirty[s] |= 1 << i;
- nvc0->dirty |= NVC0_NEW_TEXTURES;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
if (!--ref)
return ref;
}
}
}
- for (s = 0; s < 5; ++s) {
+ for (s = 0; s < 6; ++s) {
for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; ++i) {
if (!(nvc0->constbuf_valid[s] & (1 << i)))
continue;
if (!nvc0->constbuf[s][i].user &&
nvc0->constbuf[s][i].u.buf == res) {
- nvc0->dirty |= NVC0_NEW_CONSTBUF;
nvc0->constbuf_dirty[s] |= 1 << i;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+ if (unlikely(s == 5)) {
+ nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
+ } else {
+ nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_CB(s, i));
+ }
if (!--ref)
return ref;
}
}
}
- for (s = 0; s < 5; ++s) {
+ for (s = 0; s < 6; ++s) {
for (i = 0; i < NVC0_MAX_BUFFERS; ++i) {
if (nvc0->buffers[s][i].buffer == res) {
nvc0->buffers_dirty[s] |= 1 << i;
- nvc0->dirty |= NVC0_NEW_BUFFERS;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+ if (unlikely(s == 5)) {
+ nvc0->dirty_cp |= NVC0_NEW_CP_BUFFERS;
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BUF);
+ } else {
+ nvc0->dirty_3d |= NVC0_NEW_3D_BUFFERS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BUF);
+ }
if (!--ref)
return ref;
}
@@ -342,7 +352,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
if (!nvc0->tcp_empty)
goto out_err;
/* set the empty tctl prog on next draw in case one is never set */
- nvc0->dirty |= NVC0_NEW_TCTLPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TCTLPROG;
+
+ /* Do not bind the COMPUTE driver constbuf at screen initialization because
+ * CBs are aliased between 3D and COMPUTE, but make sure it will be bound if
+ * a grid is launched later. */
+ nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
/* now that there are no more opportunities for errors, set the current
* context if there isn't already one.
@@ -358,11 +373,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD;
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->text);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->uniform_bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->txc);
if (screen->compute) {
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text);
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->uniform_bo);
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc);
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm);
}
@@ -370,13 +386,13 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR;
if (screen->poly_cache)
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->poly_cache);
if (screen->compute)
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls);
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo);
if (screen->compute)
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 4a6ea867e85..d3e3a818910 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -29,34 +29,35 @@
#include "nvc0/nve4_p2mf.xml.h"
#include "nvc0/nvc0_macros.h"
-/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */
-#define NVC0_NEW_BLEND (1 << 0)
-#define NVC0_NEW_RASTERIZER (1 << 1)
-#define NVC0_NEW_ZSA (1 << 2)
-#define NVC0_NEW_VERTPROG (1 << 3)
-#define NVC0_NEW_TCTLPROG (1 << 4)
-#define NVC0_NEW_TEVLPROG (1 << 5)
-#define NVC0_NEW_GMTYPROG (1 << 6)
-#define NVC0_NEW_FRAGPROG (1 << 7)
-#define NVC0_NEW_BLEND_COLOUR (1 << 8)
-#define NVC0_NEW_STENCIL_REF (1 << 9)
-#define NVC0_NEW_CLIP (1 << 10)
-#define NVC0_NEW_SAMPLE_MASK (1 << 11)
-#define NVC0_NEW_FRAMEBUFFER (1 << 12)
-#define NVC0_NEW_STIPPLE (1 << 13)
-#define NVC0_NEW_SCISSOR (1 << 14)
-#define NVC0_NEW_VIEWPORT (1 << 15)
-#define NVC0_NEW_ARRAYS (1 << 16)
-#define NVC0_NEW_VERTEX (1 << 17)
-#define NVC0_NEW_CONSTBUF (1 << 18)
-#define NVC0_NEW_TEXTURES (1 << 19)
-#define NVC0_NEW_SAMPLERS (1 << 20)
-#define NVC0_NEW_TFB_TARGETS (1 << 21)
-#define NVC0_NEW_IDXBUF (1 << 22)
-#define NVC0_NEW_SURFACES (1 << 23)
-#define NVC0_NEW_MIN_SAMPLES (1 << 24)
-#define NVC0_NEW_TESSFACTOR (1 << 25)
-#define NVC0_NEW_BUFFERS (1 << 26)
+/* NOTE: must keep NVC0_NEW_3D_...PROG in consecutive bits in this order */
+#define NVC0_NEW_3D_BLEND (1 << 0)
+#define NVC0_NEW_3D_RASTERIZER (1 << 1)
+#define NVC0_NEW_3D_ZSA (1 << 2)
+#define NVC0_NEW_3D_VERTPROG (1 << 3)
+#define NVC0_NEW_3D_TCTLPROG (1 << 4)
+#define NVC0_NEW_3D_TEVLPROG (1 << 5)
+#define NVC0_NEW_3D_GMTYPROG (1 << 6)
+#define NVC0_NEW_3D_FRAGPROG (1 << 7)
+#define NVC0_NEW_3D_BLEND_COLOUR (1 << 8)
+#define NVC0_NEW_3D_STENCIL_REF (1 << 9)
+#define NVC0_NEW_3D_CLIP (1 << 10)
+#define NVC0_NEW_3D_SAMPLE_MASK (1 << 11)
+#define NVC0_NEW_3D_FRAMEBUFFER (1 << 12)
+#define NVC0_NEW_3D_STIPPLE (1 << 13)
+#define NVC0_NEW_3D_SCISSOR (1 << 14)
+#define NVC0_NEW_3D_VIEWPORT (1 << 15)
+#define NVC0_NEW_3D_ARRAYS (1 << 16)
+#define NVC0_NEW_3D_VERTEX (1 << 17)
+#define NVC0_NEW_3D_CONSTBUF (1 << 18)
+#define NVC0_NEW_3D_TEXTURES (1 << 19)
+#define NVC0_NEW_3D_SAMPLERS (1 << 20)
+#define NVC0_NEW_3D_TFB_TARGETS (1 << 21)
+#define NVC0_NEW_3D_IDXBUF (1 << 22)
+#define NVC0_NEW_3D_SURFACES (1 << 23)
+#define NVC0_NEW_3D_MIN_SAMPLES (1 << 24)
+#define NVC0_NEW_3D_TESSFACTOR (1 << 25)
+#define NVC0_NEW_3D_BUFFERS (1 << 26)
+#define NVC0_NEW_3D_DRIVERCONST (1 << 27)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
@@ -64,20 +65,22 @@
#define NVC0_NEW_CP_SAMPLERS (1 << 3)
#define NVC0_NEW_CP_CONSTBUF (1 << 4)
#define NVC0_NEW_CP_GLOBALS (1 << 5)
+#define NVC0_NEW_CP_DRIVERCONST (1 << 6)
+#define NVC0_NEW_CP_BUFFERS (1 << 7)
/* 3d bufctx (during draw_vbo, blit_3d) */
-#define NVC0_BIND_FB 0
-#define NVC0_BIND_VTX 1
-#define NVC0_BIND_VTX_TMP 2
-#define NVC0_BIND_IDX 3
-#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i))
-#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
-#define NVC0_BIND_TFB 244
-#define NVC0_BIND_SUF 245
-#define NVC0_BIND_BUF 246
-#define NVC0_BIND_SCREEN 247
-#define NVC0_BIND_TLS 249
-#define NVC0_BIND_3D_COUNT 250
+#define NVC0_BIND_3D_FB 0
+#define NVC0_BIND_3D_VTX 1
+#define NVC0_BIND_3D_VTX_TMP 2
+#define NVC0_BIND_3D_IDX 3
+#define NVC0_BIND_3D_TEX(s, i) ( 4 + 32 * (s) + (i))
+#define NVC0_BIND_3D_CB(s, i) (164 + 16 * (s) + (i))
+#define NVC0_BIND_3D_TFB 244
+#define NVC0_BIND_3D_SUF 245
+#define NVC0_BIND_3D_BUF 246
+#define NVC0_BIND_3D_SCREEN 247
+#define NVC0_BIND_3D_TLS 249
+#define NVC0_BIND_3D_COUNT 250
/* compute bufctx (during launch_grid) */
#define NVC0_BIND_CP_CB(i) ( 0 + (i))
@@ -87,7 +90,8 @@
#define NVC0_BIND_CP_DESC 50
#define NVC0_BIND_CP_SCREEN 51
#define NVC0_BIND_CP_QUERY 52
-#define NVC0_BIND_CP_COUNT 53
+#define NVC0_BIND_CP_BUF 53
+#define NVC0_BIND_CP_COUNT 54
/* bufctx for other operations */
#define NVC0_BIND_2D 0
@@ -114,7 +118,7 @@ struct nvc0_context {
const struct nv50_m2mf_rect *src,
uint32_t nblocksx, uint32_t nblocksy);
- uint32_t dirty;
+ uint32_t dirty_3d; /* dirty flags for 3d state */
uint32_t dirty_cp; /* dirty flags for compute state */
struct nvc0_graph_state state;
@@ -157,6 +161,7 @@ struct nvc0_context {
struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
unsigned num_samplers[6];
uint16_t samplers_dirty[6];
+ bool seamless_cube_map;
uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
@@ -267,6 +272,8 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers,
extern void nvc0_init_surface_functions(struct nvc0_context *);
/* nvc0_tex.c */
+bool nvc0_validate_tic(struct nvc0_context *nvc0, int s);
+bool nvc0_validate_tsc(struct nvc0_context *nvc0, int s);
bool nve4_validate_tsc(struct nvc0_context *nvc0, int s);
void nvc0_validate_textures(struct nvc0_context *);
void nvc0_validate_samplers(struct nvc0_context *);
@@ -331,11 +338,9 @@ nvc0_video_buffer_create(struct pipe_context *pipe,
void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
/* nve4_compute.c */
-void nve4_launch_grid(struct pipe_context *,
- const uint *, const uint *, uint32_t, const void *);
+void nve4_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
/* nvc0_compute.c */
-void nvc0_launch_grid(struct pipe_context *,
- const uint *, const uint *, uint32_t, const void *);
+void nvc0_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index 49e176cbd49..eeacc714f3e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -35,4 +35,6 @@
#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
+#define NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT 0x00003860
+
#endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 93f211bd5fc..bc884d6c08f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -544,6 +544,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
+ } else {
+ info->io.resInfoCBSlot = 15;
+ info->io.suInfoBase = 512;
}
info->io.msInfoCBSlot = 0;
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 721857edecc..f5f9bb39fd9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -846,15 +846,15 @@ nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
/* configure and reset the counter(s) */
if (d == 0)
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_A_SIGSEL(c & 3)), 1);
else
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_B_SIGSEL(c & 3)), 1);
PUSH_DATA (push, cfg->ctr[i].sig_sel);
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_SRCSEL(c)), 1);
PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_FUNC(c)), 1);
PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_SET(c)), 1);
PUSH_DATA (push, 0);
}
return true;
@@ -917,13 +917,13 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
mask_sel &= cfg->ctr[i].src_mask;
/* configure and reset the counter(s) */
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(c)), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_SIGSEL(c)), 1);
PUSH_DATA (push, cfg->ctr[i].sig_sel);
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(c)), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_SRCSEL(c)), 1);
PUSH_DATA (push, cfg->ctr[i].src_sel | mask_sel);
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_OP(c)), 1);
PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(c)), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_SET(c)), 1);
PUSH_DATA (push, 0);
}
return true;
@@ -937,11 +937,12 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
+ struct pipe_grid_info info = {};
uint32_t mask;
uint32_t input[3];
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 };
- unsigned c;
+ unsigned c, i;
if (unlikely(!screen->pm.prog)) {
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
@@ -965,9 +966,9 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
for (c = 0; c < 8; ++c)
if (screen->pm.mp_counter[c]) {
if (is_nve4) {
- IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
+ IMMED_NVC0(push, NVE4_CP(MP_PM_FUNC(c)), 0);
} else {
- IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
+ IMMED_NVC0(push, NVC0_CP(MP_PM_OP(c)), 0);
}
}
/* release counters for this query */
@@ -983,13 +984,20 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
hq->bo);
PUSH_SPACE(push, 1);
- IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
+ IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0);
pipe->bind_compute_state(pipe, screen->pm.prog);
input[0] = (hq->bo->offset + hq->base_offset);
input[1] = (hq->bo->offset + hq->base_offset) >> 32;
input[2] = hq->sequence;
- pipe->launch_grid(pipe, block, grid, 0, input);
+
+ for (i = 0; i < 3; i++) {
+ info.block[i] = block[i];
+ info.grid[i] = grid[i];
+ }
+ info.pc = 0;
+ info.input = input;
+ pipe->launch_grid(pipe, &info);
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
@@ -1010,9 +1018,9 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
break;
mask |= 1 << hsq->ctr[i];
if (is_nve4) {
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(hsq->ctr[i])), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_FUNC(hsq->ctr[i])), 1);
} else {
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(hsq->ctr[i])), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_OP(hsq->ctr[i])), 1);
}
PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index d368fda707d..998e9ea47ef 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -36,6 +36,7 @@
#include "nvc0/nvc0_screen.h"
#include "nvc0/mme/com9097.mme.h"
+#include "nvc0/mme/com90c0.mme.h"
static boolean
nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -61,7 +62,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
PIPE_BIND_TRANSFER_WRITE |
PIPE_BIND_SHARED);
- return (nvc0_format_table[format].usage & bindings) == bindings;
+ return (( nvc0_format_table[format].usage |
+ nvc0_vertex_format[format].usage) & bindings) == bindings;
}
static int
@@ -196,6 +198,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_COMPUTE:
+ if (debug_get_bool_option("NVF0_COMPUTE", false))
+ return 1;
return (class_3d <= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
@@ -262,8 +266,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 0;
break;
case PIPE_SHADER_COMPUTE:
- if (class_3d > NVE4_3D_CLASS)
- return 0;
+ if (!debug_get_bool_option("NVF0_COMPUTE", false))
+ if (class_3d > NVE4_3D_CLASS)
+ return 0;
break;
default:
return 0;
@@ -272,6 +277,10 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ if (class_3d >= NVE4_3D_CLASS)
+ return 0;
+ return 1 << PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
@@ -336,6 +345,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16; /* XXX not sure if more are really safe */
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ return 0;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
return 0;
@@ -598,6 +609,9 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
case 0xf0:
case 0x100:
case 0x110:
+ if (debug_get_bool_option("NVF0_COMPUTE", false))
+ return nve4_screen_compute_setup(screen, screen->base.pushbuf);
+ case 0x120:
return 0;
default:
return -1;
@@ -660,6 +674,7 @@ nvc0_screen_create(struct nouveau_device *dev)
case 0xf0:
case 0x100:
case 0x110:
+ case 0x120:
break;
default:
return NULL;
@@ -728,6 +743,7 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->nvsw->handle);
switch (dev->chipset & ~0xf) {
+ case 0x120:
case 0x110:
case 0x100:
case 0xf0:
@@ -779,6 +795,9 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->fence.bo->offset + 16);
switch (dev->chipset & ~0xf) {
+ case 0x120:
+ obj_class = GM200_3D_CLASS;
+ break;
case 0x110:
obj_class = GM107_3D_CLASS;
break;
@@ -860,8 +879,7 @@ nvc0_screen_create(struct nouveau_device *dev)
BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH);
if (screen->eng3d->oclass < NVE4_3D_CLASS) {
- BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
- PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ IMMED_NVC0(push, NVC0_3D(TEX_MISC), 0);
} else {
BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1);
PUSH_DATA (push, 15);
@@ -887,7 +905,7 @@ nvc0_screen_create(struct nouveau_device *dev)
*/
nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100);
- ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 6 << 16, NULL,
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 7 << 16, NULL,
&screen->uniform_bo);
if (ret)
goto fail;
@@ -899,8 +917,8 @@ nvc0_screen_create(struct nouveau_device *dev)
/* auxiliary constants (6 user clip planes, base instance id) */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
@@ -920,8 +938,8 @@ nvc0_screen_create(struct nouveau_device *dev)
/* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 256);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
PUSH_DATA (push, 0);
PUSH_DATAf(push, 0.0f);
@@ -929,8 +947,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
if (screen->base.drm->version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
@@ -988,6 +1006,14 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAh(push, screen->txc->offset);
PUSH_DATA (push, screen->txc->offset);
PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
+ if (screen->eng3d->oclass >= GM107_3D_CLASS) {
+ screen->tic.maxwell = true;
+ if (screen->eng3d->oclass == GM107_3D_CLASS) {
+ screen->tic.maxwell =
+ debug_get_bool_option("NOUVEAU_MAXWELL_TIC", true);
+ IMMED_NVC0(push, SUBC_3D(0x0f10), screen->tic.maxwell);
+ }
+ }
BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset + 65536);
@@ -1051,6 +1077,7 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
+ MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 1a56177815c..8487abcf999 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -51,8 +51,9 @@ struct nvc0_graph_state {
uint8_t c14_bound; /* whether immediate array constbuf is bound */
uint8_t clip_enable;
uint32_t clip_mode;
- uint32_t uniform_buffer_bound[5];
+ uint32_t uniform_buffer_bound[6];
struct nvc0_transform_feedback_state *tfb;
+ bool seamless_cube_map;
};
struct nvc0_screen {
@@ -83,6 +84,7 @@ struct nvc0_screen {
void **entries;
int next;
uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
+ bool maxwell;
} tic;
struct {
@@ -164,12 +166,27 @@ nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
struct nvc0_format {
uint32_t rt;
- uint32_t tic;
+ struct {
+ unsigned format:7;
+ unsigned type_r:3;
+ unsigned type_g:3;
+ unsigned type_b:3;
+ unsigned type_a:3;
+ unsigned src_x:3;
+ unsigned src_y:3;
+ unsigned src_z:3;
+ unsigned src_w:3;
+ } tic;
+ uint32_t usage;
+};
+
+struct nvc0_vertex_format {
uint32_t vtx;
uint32_t usage;
};
extern const struct nvc0_format nvc0_format_table[];
+extern const struct nvc0_vertex_format nvc0_vertex_format[];
static inline void
nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 382a18ef153..2f46c436a4c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -37,11 +37,11 @@ nvc0_program_update_context_state(struct nvc0_context *nvc0,
if (prog && prog->need_tls) {
const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;
if (!nvc0->state.tls_required)
- BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls);
nvc0->state.tls_required |= 1 << stage;
} else {
if (nvc0->state.tls_required == (1 << stage))
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS);
nvc0->state.tls_required &= ~(1 << stage);
}
@@ -152,7 +152,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
NVC0_3D_SHADE_MODEL_SMOOTH);
}
- if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) {
+ if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) {
return;
}
@@ -292,9 +292,9 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
}
nvc0->state.tfb = tfb;
- if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
+ if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS))
return;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TFB);
for (b = 0; b < nvc0->num_tfbbufs; ++b) {
struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
@@ -310,7 +310,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
buf = nv04_resource(targ->pipe.buffer);
- BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR);
if (!(nvc0->tfbbuf_dirty & (1 << b)))
continue;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index cf3d3497c78..7ccce9ff6bf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -21,6 +21,7 @@
*/
#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
@@ -32,7 +33,6 @@
#include "nvc0/nvc0_query_hw.h"
#include "nvc0/nvc0_3d.xml.h"
-#include "nv50/nv50_texture.xml.h"
#include "nouveau_gldefs.h"
@@ -186,7 +186,7 @@ nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->blend = hwcso;
- nvc0->dirty |= NVC0_NEW_BLEND;
+ nvc0->dirty_3d |= NVC0_NEW_3D_BLEND;
}
static void
@@ -315,7 +315,7 @@ nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->rast = hwcso;
- nvc0->dirty |= NVC0_NEW_RASTERIZER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_RASTERIZER;
}
static void
@@ -393,7 +393,7 @@ nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->zsa = hwcso;
- nvc0->dirty |= NVC0_NEW_ZSA;
+ nvc0->dirty_3d |= NVC0_NEW_3D_ZSA;
}
static void
@@ -449,7 +449,7 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
nvc0->num_samplers[s] = nr;
- nvc0->dirty |= NVC0_NEW_SAMPLERS;
+ nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
}
static void
@@ -566,7 +566,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
}
if (old) {
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
nvc0_screen_tic_unlock(nvc0->screen, old);
}
@@ -576,7 +576,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
for (i = nr; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
if (old) {
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
nvc0_screen_tic_unlock(nvc0->screen, old);
pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
}
@@ -584,7 +584,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
nvc0->num_textures[s] = nr;
- nvc0->dirty |= NVC0_NEW_TEXTURES;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
}
static void
@@ -594,7 +594,7 @@ nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
{
struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d;
const unsigned end = start + nr;
- const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0);
+ const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_3D_TEX(s, 0);
int last_valid = -1;
unsigned i;
@@ -733,7 +733,7 @@ nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->vertprog = hwcso;
- nvc0->dirty |= NVC0_NEW_VERTPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_VERTPROG;
}
static void *
@@ -749,7 +749,7 @@ nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->fragprog = hwcso;
- nvc0->dirty |= NVC0_NEW_FRAGPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAGPROG;
}
static void *
@@ -765,7 +765,7 @@ nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->gmtyprog = hwcso;
- nvc0->dirty |= NVC0_NEW_GMTYPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_GMTYPROG;
}
static void *
@@ -781,7 +781,7 @@ nvc0_tcp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->tctlprog = hwcso;
- nvc0->dirty |= NVC0_NEW_TCTLPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TCTLPROG;
}
static void *
@@ -797,7 +797,7 @@ nvc0_tep_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->tevlprog = hwcso;
- nvc0->dirty |= NVC0_NEW_TEVLPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TEVLPROG;
}
static void *
@@ -839,7 +839,9 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
const unsigned i = index;
if (unlikely(shader == PIPE_SHADER_COMPUTE)) {
- assert(!cb || !cb->user_buffer);
+ if (nvc0->constbuf[s][i].user)
+ nvc0->constbuf[s][i].u.buf = NULL;
+ else
if (nvc0->constbuf[s][i].u.buf)
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
@@ -849,9 +851,9 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nvc0->constbuf[s][i].u.buf = NULL;
else
if (nvc0->constbuf[s][i].u.buf)
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_CB(s, i));
- nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
}
nvc0->constbuf_dirty[s] |= 1 << i;
@@ -891,7 +893,7 @@ nvc0_set_blend_color(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->blend_colour = *bcol;
- nvc0->dirty |= NVC0_NEW_BLEND_COLOUR;
+ nvc0->dirty_3d |= NVC0_NEW_3D_BLEND_COLOUR;
}
static void
@@ -901,7 +903,7 @@ nvc0_set_stencil_ref(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->stencil_ref = *sr;
- nvc0->dirty |= NVC0_NEW_STENCIL_REF;
+ nvc0->dirty_3d |= NVC0_NEW_3D_STENCIL_REF;
}
static void
@@ -912,7 +914,7 @@ nvc0_set_clip_state(struct pipe_context *pipe,
memcpy(nvc0->clip.ucp, clip->ucp, sizeof(clip->ucp));
- nvc0->dirty |= NVC0_NEW_CLIP;
+ nvc0->dirty_3d |= NVC0_NEW_3D_CLIP;
}
static void
@@ -921,7 +923,7 @@ nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->sample_mask = sample_mask;
- nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
+ nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_MASK;
}
static void
@@ -931,7 +933,7 @@ nvc0_set_min_samples(struct pipe_context *pipe, unsigned min_samples)
if (nvc0->min_samples != min_samples) {
nvc0->min_samples = min_samples;
- nvc0->dirty |= NVC0_NEW_MIN_SAMPLES;
+ nvc0->dirty_3d |= NVC0_NEW_3D_MIN_SAMPLES;
}
}
@@ -940,23 +942,12 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *fb)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
- unsigned i;
-
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
- for (i = 0; i < fb->nr_cbufs; ++i)
- pipe_surface_reference(&nvc0->framebuffer.cbufs[i], fb->cbufs[i]);
- for (; i < nvc0->framebuffer.nr_cbufs; ++i)
- pipe_surface_reference(&nvc0->framebuffer.cbufs[i], NULL);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
- nvc0->framebuffer.nr_cbufs = fb->nr_cbufs;
+ util_copy_framebuffer_state(&nvc0->framebuffer, fb);
- nvc0->framebuffer.width = fb->width;
- nvc0->framebuffer.height = fb->height;
-
- pipe_surface_reference(&nvc0->framebuffer.zsbuf, fb->zsbuf);
-
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
}
static void
@@ -966,7 +957,7 @@ nvc0_set_polygon_stipple(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->stipple = *stipple;
- nvc0->dirty |= NVC0_NEW_STIPPLE;
+ nvc0->dirty_3d |= NVC0_NEW_3D_STIPPLE;
}
static void
@@ -984,7 +975,7 @@ nvc0_set_scissor_states(struct pipe_context *pipe,
continue;
nvc0->scissors[start_slot + i] = scissor[i];
nvc0->scissors_dirty |= 1 << (start_slot + i);
- nvc0->dirty |= NVC0_NEW_SCISSOR;
+ nvc0->dirty_3d |= NVC0_NEW_3D_SCISSOR;
}
}
@@ -1003,7 +994,7 @@ nvc0_set_viewport_states(struct pipe_context *pipe,
continue;
nvc0->viewports[start_slot + i] = vpt[i];
nvc0->viewports_dirty |= 1 << (start_slot + i);
- nvc0->dirty |= NVC0_NEW_VIEWPORT;
+ nvc0->dirty_3d |= NVC0_NEW_3D_VIEWPORT;
}
}
@@ -1017,7 +1008,7 @@ nvc0_set_tess_state(struct pipe_context *pipe,
memcpy(nvc0->default_tess_outer, default_tess_outer, 4 * sizeof(float));
memcpy(nvc0->default_tess_inner, default_tess_inner, 2 * sizeof(float));
- nvc0->dirty |= NVC0_NEW_TESSFACTOR;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TESSFACTOR;
}
static void
@@ -1028,8 +1019,8 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
unsigned i;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
- nvc0->dirty |= NVC0_NEW_ARRAYS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX);
+ nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS;
util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb,
start_slot, count);
@@ -1071,20 +1062,20 @@ nvc0_set_index_buffer(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
if (nvc0->idxbuf.buffer)
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
if (ib) {
pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer);
nvc0->idxbuf.index_size = ib->index_size;
if (ib->buffer) {
nvc0->idxbuf.offset = ib->offset;
- nvc0->dirty |= NVC0_NEW_IDXBUF;
+ nvc0->dirty_3d |= NVC0_NEW_3D_IDXBUF;
} else {
nvc0->idxbuf.user_buffer = ib->user_buffer;
- nvc0->dirty &= ~NVC0_NEW_IDXBUF;
+ nvc0->dirty_3d &= ~NVC0_NEW_3D_IDXBUF;
}
} else {
- nvc0->dirty &= ~NVC0_NEW_IDXBUF;
+ nvc0->dirty_3d &= ~NVC0_NEW_3D_IDXBUF;
pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
}
}
@@ -1095,7 +1086,7 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->vertex = hwcso;
- nvc0->dirty |= NVC0_NEW_VERTEX;
+ nvc0->dirty_3d |= NVC0_NEW_3D_VERTEX;
}
static struct pipe_stream_output_target *
@@ -1194,7 +1185,7 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
nvc0->num_tfbbufs = num_targets;
if (nvc0->tfbbuf_dirty)
- nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TFB_TARGETS;
}
static void
@@ -1223,7 +1214,7 @@ nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t,
nvc0->surfaces_dirty[t] |= mask;
if (t == 0)
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
else
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
}
@@ -1241,7 +1232,7 @@ nvc0_set_compute_resources(struct pipe_context *pipe,
static void
nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
unsigned start_slot, unsigned count,
- struct pipe_image_view **views)
+ struct pipe_image_view *views)
{
}
@@ -1254,7 +1245,7 @@ nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t,
const unsigned mask = ((1 << nr) - 1) << start;
unsigned i;
- assert(t < 5);
+ assert(t < 6);
if (pbuffers) {
for (i = start; i < end; ++i) {
@@ -1274,7 +1265,11 @@ nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t,
}
nvc0->buffers_dirty[t] |= mask;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+ if (t == 5)
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BUF);
+ else
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BUF);
+
}
static void
@@ -1286,7 +1281,10 @@ nvc0_set_shader_buffers(struct pipe_context *pipe,
const unsigned s = nvc0_shader_stage(shader);
nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers);
- nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS;
+ if (s == 5)
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_BUFFERS;
+ else
+ nvc0_context(pipe)->dirty_3d |= NVC0_NEW_3D_BUFFERS;
}
static inline void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index e0d8ab01776..18e79e36b85 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -3,7 +3,6 @@
#include "util/u_math.h"
#include "nvc0/nvc0_context.h"
-#include "nv50/nv50_defs.xml.h"
#if 0
static void
@@ -77,7 +76,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
bool serialize = false;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
@@ -142,7 +141,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
/* only register for writing, otherwise we'd always serialize here */
- BCTX_REFN(nvc0->bufctx_3d, FB, res, WR);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR);
}
if (fb->zsbuf) {
@@ -173,7 +172,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
- BCTX_REFN(nvc0->bufctx_3d, FB, &mt->base, WR);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR);
} else {
BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -184,8 +183,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
ms = 1 << ms_mode;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
PUSH_DATA (push, 256 + 128);
for (i = 0; i < ms; i++) {
@@ -240,7 +239,7 @@ nvc0_validate_scissor(struct nvc0_context *nvc0)
int i;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- if (!(nvc0->dirty & NVC0_NEW_SCISSOR) &&
+ if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) &&
nvc0->rast->pipe.scissor == nvc0->state.scissor)
return;
@@ -318,8 +317,8 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10));
- PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATAh(push, bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATA (push, bo->offset + (6 << 16) + (s << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
PUSH_DATA (push, 256);
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
@@ -368,7 +367,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0)
if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
nvc0_check_program_ucps(nvc0, vp, clip_enable);
- if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage)))
+ if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage)))
if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
nvc0_upload_uclip_planes(nvc0, stage);
@@ -455,7 +454,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
PUSH_DATA (push, (i << 4) | 1);
- BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
res->cb_bindings[s] |= 1 << i;
@@ -468,6 +467,11 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
}
}
}
+
+ /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
+ nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
+ nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
+ nvc0->state.uniform_buffer_bound[5] = 0;
}
static void
@@ -479,8 +483,8 @@ nvc0_validate_buffers(struct nvc0_context *nvc0)
for (s = 0; s < 5; s++) {
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
PUSH_DATA (push, 512);
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
@@ -491,7 +495,7 @@ nvc0_validate_buffers(struct nvc0_context *nvc0)
PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
PUSH_DATA (push, 0);
- BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);
} else {
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
@@ -536,6 +540,25 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);
}
+static void
+nvc0_validate_driverconst(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ int i;
+
+ for (i = 0; i < 5; ++i) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
+ PUSH_DATA (push, (15 << 4) | 1);
+ }
+
+ nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
+}
+
void
nvc0_validate_global_residents(struct nvc0_context *nvc0,
struct nouveau_bufctx *bctx, int bin)
@@ -629,35 +652,37 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
else
ctx_to->state = ctx_to->screen->save_state;
- ctx_to->dirty = ~0;
+ ctx_to->dirty_3d = ~0;
+ ctx_to->dirty_cp = ~0;
ctx_to->viewports_dirty = ~0;
ctx_to->scissors_dirty = ~0;
- for (s = 0; s < 5; ++s) {
+ for (s = 0; s < 6; ++s) {
ctx_to->samplers_dirty[s] = ~0;
ctx_to->textures_dirty[s] = ~0;
ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
+ ctx_to->buffers_dirty[s] = ~0;
}
/* Reset tfb as the shader that owns it may have been deleted. */
ctx_to->state.tfb = NULL;
if (!ctx_to->vertex)
- ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
+ ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
if (!ctx_to->idxbuf.buffer)
- ctx_to->dirty &= ~NVC0_NEW_IDXBUF;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_IDXBUF;
if (!ctx_to->vertprog)
- ctx_to->dirty &= ~NVC0_NEW_VERTPROG;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG;
if (!ctx_to->fragprog)
- ctx_to->dirty &= ~NVC0_NEW_FRAGPROG;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG;
if (!ctx_to->blend)
- ctx_to->dirty &= ~NVC0_NEW_BLEND;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND;
if (!ctx_to->rast)
- ctx_to->dirty &= ~(NVC0_NEW_RASTERIZER | NVC0_NEW_SCISSOR);
+ ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR);
if (!ctx_to->zsa)
- ctx_to->dirty &= ~NVC0_NEW_ZSA;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA;
ctx_to->screen->cur_ctx = ctx_to;
}
@@ -666,40 +691,41 @@ static struct state_validate {
void (*func)(struct nvc0_context *);
uint32_t states;
} validate_list[] = {
- { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER },
- { nvc0_validate_blend, NVC0_NEW_BLEND },
- { nvc0_validate_zsa, NVC0_NEW_ZSA },
- { nvc0_validate_sample_mask, NVC0_NEW_SAMPLE_MASK },
- { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER },
- { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
- { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
- { nvc0_validate_stipple, NVC0_NEW_STIPPLE },
- { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER },
- { nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
- { nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
- { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
- { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
- { nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR },
- { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
- { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER },
- { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
- NVC0_NEW_RASTERIZER },
- { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
- { nvc0_validate_derived_3, NVC0_NEW_BLEND | NVC0_NEW_FRAMEBUFFER },
- { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
- NVC0_NEW_VERTPROG |
- NVC0_NEW_TEVLPROG |
- NVC0_NEW_GMTYPROG },
- { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
- { nvc0_validate_textures, NVC0_NEW_TEXTURES },
- { nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
- { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
- { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
- { nvc0_validate_surfaces, NVC0_NEW_SURFACES },
- { nvc0_validate_buffers, NVC0_NEW_BUFFERS },
- { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
- { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG },
- { nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES },
+ { nvc0_validate_fb, NVC0_NEW_3D_FRAMEBUFFER },
+ { nvc0_validate_blend, NVC0_NEW_3D_BLEND },
+ { nvc0_validate_zsa, NVC0_NEW_3D_ZSA },
+ { nvc0_validate_sample_mask, NVC0_NEW_3D_SAMPLE_MASK },
+ { nvc0_validate_rasterizer, NVC0_NEW_3D_RASTERIZER },
+ { nvc0_validate_blend_colour, NVC0_NEW_3D_BLEND_COLOUR },
+ { nvc0_validate_stencil_ref, NVC0_NEW_3D_STENCIL_REF },
+ { nvc0_validate_stipple, NVC0_NEW_3D_STIPPLE },
+ { nvc0_validate_scissor, NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER },
+ { nvc0_validate_viewport, NVC0_NEW_3D_VIEWPORT },
+ { nvc0_vertprog_validate, NVC0_NEW_3D_VERTPROG },
+ { nvc0_tctlprog_validate, NVC0_NEW_3D_TCTLPROG },
+ { nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG },
+ { nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR },
+ { nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG },
+ { nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER },
+ { nvc0_validate_derived_1, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA |
+ NVC0_NEW_3D_RASTERIZER },
+ { nvc0_validate_derived_2, NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER },
+ { nvc0_validate_derived_3, NVC0_NEW_3D_BLEND | NVC0_NEW_3D_FRAMEBUFFER },
+ { nvc0_validate_clip, NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER |
+ NVC0_NEW_3D_VERTPROG |
+ NVC0_NEW_3D_TEVLPROG |
+ NVC0_NEW_3D_GMTYPROG },
+ { nvc0_constbufs_validate, NVC0_NEW_3D_CONSTBUF },
+ { nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },
+ { nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },
+ { nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
+ { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
+ { nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },
+ { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },
+ { nvc0_idxbuf_validate, NVC0_NEW_3D_IDXBUF },
+ { nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG },
+ { nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES },
+ { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST },
};
bool
@@ -712,7 +738,7 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask)
if (nvc0->screen->cur_ctx != nvc0)
nvc0_switch_pipe_context(nvc0);
- state_mask = nvc0->dirty & mask;
+ state_mask = nvc0->dirty_3d & mask;
if (state_mask) {
for (i = 0; i < ARRAY_SIZE(validate_list); ++i) {
@@ -721,7 +747,7 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask)
if (state_mask & validate->states)
validate->func(nvc0);
}
- nvc0->dirty &= ~state_mask;
+ nvc0->dirty_3d &= ~state_mask;
nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, false);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index e3843ca1bf1..49577969d3d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -34,8 +34,8 @@
#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_resource.h"
-#include "nv50/nv50_defs.xml.h"
-#include "nv50/nv50_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
+#include "nv50/g80_texture.xml.h"
/* these are used in nv50_blit.h */
#define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
@@ -54,7 +54,7 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
/* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */
if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal)
- return NV50_SURFACE_FORMAT_A8_UNORM;
+ return G80_SURFACE_FORMAT_A8_UNORM;
/* Hardware values for color formats range from 0xc0 to 0xff,
* but the 2D engine doesn't support all of them.
@@ -65,15 +65,15 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
switch (util_format_get_blocksize(format)) {
case 1:
- return NV50_SURFACE_FORMAT_R8_UNORM;
+ return G80_SURFACE_FORMAT_R8_UNORM;
case 2:
- return NV50_SURFACE_FORMAT_RG8_UNORM;
+ return G80_SURFACE_FORMAT_RG8_UNORM;
case 4:
- return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+ return G80_SURFACE_FORMAT_BGRA8_UNORM;
case 8:
- return NV50_SURFACE_FORMAT_RGBA16_UNORM;
+ return G80_SURFACE_FORMAT_RGBA16_UNORM;
case 16:
- return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
+ return G80_SURFACE_FORMAT_RGBA32_FLOAT;
default:
assert(0);
return 0;
@@ -353,7 +353,7 @@ nvc0_clear_render_target(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
}
static void
@@ -609,7 +609,7 @@ nvc0_clear_buffer(struct pipe_context *pipe,
data, data_size);
}
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
}
static void
@@ -678,7 +678,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
}
void
@@ -693,7 +693,7 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers,
uint32_t mode = 0;
/* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
- if (!nvc0_state_validate(nvc0, NVC0_NEW_FRAMEBUFFER))
+ if (!nvc0_state_validate(nvc0, NVC0_NEW_3D_FRAMEBUFFER))
return;
if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
@@ -793,7 +793,7 @@ struct nvc0_blitctx
struct pipe_sampler_view *texture[2];
struct nv50_tsc_entry *sampler[2];
unsigned min_samples;
- uint32_t dirty;
+ uint32_t dirty_3d;
} saved;
struct nvc0_rasterizer_stateobj rast;
};
@@ -871,12 +871,14 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
blit->sampler[0].id = -1;
- blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT);
+ blit->sampler[0].tsc[0] = G80_TSC_0_SRGB_CONVERSION |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_U__SHIFT) |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_V__SHIFT) |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_P__SHIFT);
blit->sampler[0].tsc[1] =
- NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE;
+ G80_TSC_1_MAG_FILTER_NEAREST |
+ G80_TSC_1_MIN_FILTER_NEAREST |
+ G80_TSC_1_MIP_FILTER_NONE;
/* clamp to edge, min/max lod = 0, bilinear filtering */
@@ -884,7 +886,9 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0];
blit->sampler[1].tsc[1] =
- NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE;
+ G80_TSC_1_MAG_FILTER_LINEAR |
+ G80_TSC_1_MIN_FILTER_LINEAR |
+ G80_TSC_1_MIP_FILTER_NONE;
}
static void
@@ -1081,19 +1085,19 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx)
nvc0->min_samples = 1;
- ctx->saved.dirty = nvc0->dirty;
+ ctx->saved.dirty_3d = nvc0->dirty_3d;
nvc0->textures_dirty[4] |= 3;
nvc0->samplers_dirty[4] |= 3;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1));
- nvc0->dirty = NVC0_NEW_FRAMEBUFFER | NVC0_NEW_MIN_SAMPLES |
- NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
- NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
- NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS;
+ nvc0->dirty_3d = NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_MIN_SAMPLES |
+ NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG |
+ NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG |
+ NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS;
}
static void
@@ -1141,20 +1145,20 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
nvc0->cond_cond, nvc0->cond_mode);
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1));
nouveau_scratch_done(&nvc0->base);
- nvc0->dirty = blit->saved.dirty |
- (NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK |
- NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND |
- NVC0_NEW_VIEWPORT |
- NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS |
- NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
- NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
- NVC0_NEW_TFB_TARGETS | NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
+ nvc0->dirty_3d = blit->saved.dirty_3d |
+ (NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_SAMPLE_MASK |
+ NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_ZSA | NVC0_NEW_3D_BLEND |
+ NVC0_NEW_3D_VIEWPORT |
+ NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS |
+ NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG |
+ NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG |
+ NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
nvc0->scissors_dirty |= 1;
nvc0->viewports_dirty |= 1;
@@ -1263,7 +1267,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
return;
}
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP,
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo);
nouveau_pushbuf_validate(push);
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 4);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 7223f5aecfb..53332400a4f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -22,35 +22,29 @@
#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_resource.h"
-#include "nv50/nv50_texture.xml.h"
-#include "nv50/nv50_defs.xml.h"
+#include "nvc0/gm107_texture.xml.h"
+#include "nvc0/nvc0_compute.xml.h"
+#include "nv50/g80_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
#include "util/u_format.h"
#define NVE4_TIC_ENTRY_INVALID 0x000fffff
#define NVE4_TSC_ENTRY_INVALID 0xfff00000
-#define NV50_TIC_0_SWIZZLE__MASK \
- (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
- NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
-
static inline uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
+nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
{
switch (swz) {
- case PIPE_SWIZZLE_RED:
- return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
- case PIPE_SWIZZLE_GREEN:
- return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
- case PIPE_SWIZZLE_BLUE:
- return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
- case PIPE_SWIZZLE_ALPHA:
- return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_RED : return fmt->tic.src_x;
+ case PIPE_SWIZZLE_GREEN: return fmt->tic.src_y;
+ case PIPE_SWIZZLE_BLUE : return fmt->tic.src_z;
+ case PIPE_SWIZZLE_ALPHA: return fmt->tic.src_w;
case PIPE_SWIZZLE_ONE:
- return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
case PIPE_SWIZZLE_ZERO:
default:
- return NV50_TIC_MAP_ZERO;
+ return G80_TIC_SOURCE_ZERO;
}
}
@@ -67,14 +61,15 @@ nvc0_create_sampler_view(struct pipe_context *pipe,
return nvc0_create_texture_view(pipe, res, templ, flags, templ->target);
}
-struct pipe_sampler_view *
-nvc0_create_texture_view(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_sampler_view *templ,
- uint32_t flags,
- enum pipe_texture_target target)
+static struct pipe_sampler_view *
+gm107_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
{
const struct util_format_description *desc;
+ const struct nvc0_format *fmt;
uint64_t address;
uint32_t *tic;
uint32_t swz[4];
@@ -101,45 +96,224 @@ nvc0_create_texture_view(struct pipe_context *pipe,
tic = &view->tic[0];
desc = util_format_description(view->pipe.format);
+ tex_int = util_format_is_pure_integer(view->pipe.format);
+
+ fmt = &nvc0_format_table[view->pipe.format];
+ swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
+
+ tic[0] = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
+ tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
+ tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
+ tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
+ tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
+ tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
+ tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
+ tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
+ tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
+
+ address = mt->base.address;
+
+ tic[3] = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
+ tic[4] = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
+ tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
+
+ if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
+ tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
+ else
+ tic[5] = 0;
+
+ /* check for linear storage type */
+ if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
+ if (texture->target == PIPE_BUFFER) {
+ assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
+ width = view->pipe.u.buf.last_element - view->pipe.u.buf.first_element;
+ address +=
+ view->pipe.u.buf.first_element * desc->block.bits / 8;
+ tic[2] = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
+ tic[3] |= width >> 16;
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
+ tic[4] |= width & 0xffff;
+ } else {
+ assert(!(mt->level[0].pitch & 0x1f));
+ /* must be 2D texture without mip maps */
+ tic[2] = GM107_TIC2_2_HEADER_VERSION_PITCH;
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
+ tic[3] |= mt->level[0].pitch >> 5;
+ tic[4] |= mt->base.base.width0 - 1;
+ tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
+ tic[5] |= mt->base.base.height0 - 1;
+ }
+ tic[1] = address;
+ tic[2] |= address >> 32;
+ tic[6] = 0;
+ tic[7] = 0;
+ return &view->pipe;
+ }
+
+ tic[2] = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
+ tic[3] |=
+ ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
+ ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
+
+ depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
- tic[0] = nvc0_format_table[view->pipe.format].tic;
+ if (mt->base.base.array_size > 1) {
+ /* there doesn't seem to be a base layer field in TIC */
+ address += view->pipe.u.tex.first_layer * mt->layer_stride;
+ depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
+ }
+ tic[1] = address;
+ tic[2] |= address >> 32;
+
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
+ break;
+ case PIPE_TEXTURE_2D:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
+ break;
+ case PIPE_TEXTURE_3D:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ depth /= 6;
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
+ break;
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ depth /= 6;
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
+ break;
+ default:
+ unreachable("unexpected/invalid texture target");
+ }
+
+ tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
+ GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
+ GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
+ GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
+
+ if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
+ width = mt->base.base.width0 << mt->ms_x;
+ height = mt->base.base.height0 << mt->ms_y;
+ } else {
+ width = mt->base.base.width0;
+ height = mt->base.base.height0;
+ }
+
+ tic[4] |= width - 1;
+
+ tic[5] |= (height - 1) & 0xffff;
+ tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
+ tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
+
+ /* sampling points: (?) */
+ if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
+ tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
+ tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
+ } else {
+ tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
+ tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
+ }
+
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+ tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
+
+ return &view->pipe;
+}
+
+static struct pipe_sampler_view *
+gf100_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
+{
+ const struct util_format_description *desc;
+ const struct nvc0_format *fmt;
+ uint64_t address;
+ uint32_t *tic;
+ uint32_t swz[4];
+ uint32_t width, height;
+ uint32_t depth;
+ struct nv50_tic_entry *view;
+ struct nv50_miptree *mt;
+ bool tex_int;
+
+ view = MALLOC_STRUCT(nv50_tic_entry);
+ if (!view)
+ return NULL;
+ mt = nv50_miptree(texture);
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ view->pipe.context = pipe;
+
+ view->id = -1;
+
+ pipe_resource_reference(&view->pipe.texture, texture);
+
+ tic = &view->tic[0];
+
+ desc = util_format_description(view->pipe.format);
+
+ fmt = &nvc0_format_table[view->pipe.format];
tex_int = util_format_is_pure_integer(view->pipe.format);
- swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
- swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
- swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
- swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
- tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
- (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
- (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
- (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
- (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+ swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
+ tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
+ (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
+ (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
+ (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
+ (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
+ (swz[3] << G80_TIC_0_W_SOURCE__SHIFT);
address = mt->base.address;
- tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER;
+ tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+ tic[2] |= G80_TIC_2_SRGB_CONVERSION;
if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
- tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+ tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
/* check for linear storage type */
if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
if (texture->target == PIPE_BUFFER) {
- assert(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS));
+ assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
address +=
view->pipe.u.buf.first_element * desc->block.bits / 8;
- tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER;
+ tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
tic[3] = 0;
tic[4] = /* width */
view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
tic[5] = 0;
} else {
/* must be 2D texture without mip maps */
- tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT;
+ tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
tic[3] = mt->level[0].pitch;
tic[4] = mt->base.base.width0;
tic[5] = (1 << 16) | mt->base.base.height0;
@@ -167,30 +341,30 @@ nvc0_create_texture_view(struct pipe_context *pipe,
switch (target) {
case PIPE_TEXTURE_1D:
- tic[2] |= NV50_TIC_2_TARGET_1D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
break;
case PIPE_TEXTURE_2D:
- tic[2] |= NV50_TIC_2_TARGET_2D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_RECT:
- tic[2] |= NV50_TIC_2_TARGET_2D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_3D:
- tic[2] |= NV50_TIC_2_TARGET_3D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
break;
case PIPE_TEXTURE_CUBE:
depth /= 6;
- tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
break;
case PIPE_TEXTURE_1D_ARRAY:
- tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
break;
case PIPE_TEXTURE_2D_ARRAY:
- tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
break;
case PIPE_TEXTURE_CUBE_ARRAY:
depth /= 6;
- tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
break;
default:
unreachable("unexpected/invalid texture target");
@@ -224,6 +398,18 @@ nvc0_create_texture_view(struct pipe_context *pipe,
return &view->pipe;
}
+struct pipe_sampler_view *
+nvc0_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
+{
+ if (nvc0_context(pipe)->screen->tic.maxwell)
+ return gm107_create_texture_view(pipe, texture, templ, flags, target);
+ return gf100_create_texture_view(pipe, texture, templ, flags, target);
+}
+
static void
nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
struct nv04_resource *res)
@@ -244,7 +430,7 @@ nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
tic->tic[2] |= address >> 32;
}
-static bool
+bool
nvc0_validate_tic(struct nvc0_context *nvc0, int s)
{
uint32_t commands[32];
@@ -285,7 +471,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
- BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+ if (unlikely(s == 5))
+ BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
+ else
+ BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
PUSH_DATA (push, (tic->id << 4) | 1);
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
}
@@ -298,7 +487,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
continue;
commands[n++] = (tic->id << 9) | (i << 1) | 1;
- BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ if (unlikely(s == 5))
+ BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
+ else
+ BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
}
for (; i < nvc0->state.num_textures[s]; ++i)
commands[n++] = (i << 1) | 0;
@@ -306,7 +498,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
nvc0->state.num_textures[s] = nvc0->num_textures[s];
if (n) {
- BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
+ if (unlikely(s == 5))
+ BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
+ else
+ BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
PUSH_DATAp(push, commands, n);
}
nvc0->textures_dirty[s] = 0;
@@ -362,7 +557,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
nvc0->tex_handles[s][i] |= tic->id;
if (dirty)
- BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
}
for (; i < nvc0->state.num_textures[s]; ++i) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
@@ -392,7 +587,7 @@ void nvc0_validate_textures(struct nvc0_context *nvc0)
}
}
-static bool
+bool
nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
{
uint32_t commands[16];
@@ -410,6 +605,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
commands[n++] = (i << 4) | 0;
continue;
}
+ nvc0->seamless_cube_map = tsc->seamless_cube_map;
if (tsc->id < 0) {
tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
@@ -428,7 +624,10 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
if (n) {
- BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
+ if (unlikely(s == 5))
+ BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
+ else
+ BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
PUSH_DATAp(push, commands, n);
}
nvc0->samplers_dirty[s] = 0;
@@ -513,7 +712,7 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
return;
- address = nvc0->screen->uniform_bo->offset + (5 << 16);
+ address = nvc0->screen->uniform_bo->offset + (6 << 16);
for (s = 0; s < 5; ++s, address += (1 << 10)) {
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
@@ -686,45 +885,45 @@ nvc0_validate_surfaces(struct nvc0_context *nvc0)
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
{
- [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT,
- [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT,
- [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT,
- [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT,
- [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM,
- [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM,
- [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT,
- [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT,
- [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM,
- [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM,
- [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT,
- [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT,
- [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT,
- [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM,
-/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */
- [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT,
- [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT,
- [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT,
- [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT,
- [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM,
- [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM,
- [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT,
- [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT,
- [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM,
- [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM,
- [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT,
- [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT,
- [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT,
- [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT,
- [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT,
- [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT,
- [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM,
- [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM,
- [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT,
- [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT,
- [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM,
- [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM,
- [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT,
- [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT,
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT, */
+ [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
+ [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
+ [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
+ [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
+ [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
+ [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
+ [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
+ [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
+ [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
+ [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
+ [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
+ [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
+ [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
+ [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
+ [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
+ [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
+ [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
+ [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
+ [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
+ [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
+ [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
};
/* Auxiliary format description values for surface instructions.
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index 279c7e93cc8..24d23d29bbf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -3,8 +3,6 @@
#include "nvc0/nvc0_context.h"
-#include "nv50/nv50_defs.xml.h"
-
struct nvc0_transfer {
struct pipe_transfer base;
struct nv50_m2mf_rect rect[2];
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 032b3c125cf..647aa10ec35 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -80,7 +80,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
enum pipe_format fmt = ve->src_format;
so->element[i].pipe = elements[i];
- so->element[i].state = nvc0_format_table[fmt].vtx;
+ so->element[i].state = nvc0_vertex_format[fmt].vtx;
if (!so->element[i].state) {
switch (util_format_get_nr_components(fmt)) {
@@ -93,7 +93,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
FREE(so);
return NULL;
}
- so->element[i].state = nvc0_format_table[fmt].vtx;
+ so->element[i].state = nvc0_vertex_format[fmt].vtx;
so->need_conversion = true;
pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
"Converting vertex element %d, no hw format %s",
@@ -222,7 +222,7 @@ static inline void
nvc0_release_user_vbufs(struct nvc0_context *nvc0)
{
if (nvc0->vbo_user) {
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
nouveau_scratch_done(&nvc0->base);
}
}
@@ -257,7 +257,7 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0)
address[b] = nouveau_scratch_data(&nvc0->base, vb->user_buffer,
base, size, &bo);
if (bo)
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo);
NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size);
}
@@ -292,7 +292,7 @@ nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0)
address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].user_buffer,
base, size, &bo);
if (bo)
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo);
BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5);
PUSH_DATA (push, b);
@@ -368,7 +368,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
if (!(refd & (1 << b))) {
refd |= 1 << b;
- BCTX_REFN(nvc0->bufctx_3d, VTX, res, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, res, RD);
}
}
if (nvc0->vbo_user)
@@ -412,7 +412,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
PUSH_DATAh(push, buf->address + limit);
PUSH_DATA (push, buf->address + limit);
- BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, buf, RD);
}
/* If there are more elements than buffers, we might not have unset
* fetching on the later elements.
@@ -435,7 +435,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
uint8_t vbo_mode;
bool update_vertex;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX);
assert(vertex);
if (unlikely(vertex->need_conversion) ||
@@ -446,7 +446,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
}
const_vbos = vbo_mode ? 0 : nvc0->constant_vbos;
- update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) ||
+ update_vertex = (nvc0->dirty_3d & NVC0_NEW_3D_VERTEX) ||
(const_vbos != nvc0->state.constant_vbos) ||
(vbo_mode != nvc0->state.vbo_mode);
@@ -537,7 +537,7 @@ nvc0_idxbuf_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, buf->address + buf->base.width0 - 1);
PUSH_DATA (push, nvc0->idxbuf.index_size >> 1);
- BCTX_REFN(nvc0->bufctx_3d, IDX, buf, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD);
}
#define NVC0_PRIM_GL_CASE(n) \
@@ -833,8 +833,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
/* Queue things up to let the macros write params to the driver constbuf */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
+ PUSH_DATA (push, 256 + 128);
if (info->indexed) {
assert(nvc0->idxbuf.buffer);
@@ -947,12 +949,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->indexed && (nvc0->vb_elt_limit >= (info->count * 2));
/* Check whether we want to switch vertex-submission mode. */
- if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_ARRAYS | NVC0_NEW_VERTEX))) {
+ if (nvc0->vbo_user && !(nvc0->dirty_3d & (NVC0_NEW_3D_ARRAYS | NVC0_NEW_3D_VERTEX))) {
if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode)
if (nvc0->state.vbo_mode != 3)
- nvc0->dirty |= NVC0_NEW_ARRAYS;
+ nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS;
- if (!(nvc0->dirty & NVC0_NEW_ARRAYS) && nvc0->state.vbo_mode == 0) {
+ if (!(nvc0->dirty_3d & NVC0_NEW_3D_ARRAYS) && nvc0->state.vbo_mode == 0) {
if (nvc0->vertex->shared_slots)
nvc0_update_user_vbufs_shared(nvc0);
else
@@ -973,8 +975,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
if (!info->indirect) {
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
PUSH_DATA (push, 256 + 128);
@@ -984,6 +986,14 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
}
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS &&
+ nvc0->seamless_cube_map != nvc0->state.seamless_cube_map) {
+ nvc0->state.seamless_cube_map = nvc0->seamless_cube_map;
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(TEX_MISC),
+ nvc0->seamless_cube_map ? NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);
+ }
+
push->kick_notify = nvc0_draw_vbo_kick_notify;
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
index 9c19ba20a7e..20b6742d8d7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -225,7 +225,7 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
PUSH_DATAh(push, va + size - 1);
PUSH_DATA (push, va + size - 1);
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
bo);
nouveau_pushbuf_validate(push);
@@ -554,7 +554,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
++ctx.instance_id;
}
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
nouveau_scratch_done(&nvc0->base);
} while (inst_count);
@@ -629,7 +629,7 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx,
data = (uint32_t *)nouveau_scratch_get(&nvc0->base,
info->count * index_size, &va, &bo);
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
bo);
nouveau_pushbuf_validate(push);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
index 79abe78b77a..4d07546c310 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
@@ -50,9 +50,9 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define NVC0_3D(n) SUBC_3D(NVC0_3D_##n)
#define NVE4_3D(n) SUBC_3D(NVE4_3D_##n)
-#define SUBC_COMPUTE(m) 1, (m)
-#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n)
-#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n)
+#define SUBC_CP(m) 1, (m)
+#define NVC0_CP(n) SUBC_CP(NVC0_COMPUTE_##n)
+#define NVE4_CP(n) SUBC_CP(NVE4_COMPUTE_##n)
#define SUBC_M2MF(m) 2, (m)
#define SUBC_P2MF(m) 2, (m)
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index d3e5676873e..652bc6d83d6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -39,7 +39,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
{
struct nouveau_device *dev = screen->base.device;
struct nouveau_object *chan = screen->base.channel;
- unsigned i;
+ int i;
int ret;
uint32_t obj_class;
@@ -51,6 +51,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
case 0xe0:
obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
break;
+ case 0x110:
+ obj_class = GM107_COMPUTE_CLASS;
+ break;
default:
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
return -1;
@@ -68,21 +71,21 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
if (ret)
return ret;
- BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->compute->oclass);
- BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(TEMP_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->tls->offset);
PUSH_DATA (push, screen->tls->offset);
/* No idea why there are 2. Divide size by 2 to be safe.
* Actually this might be per-MP TEMP size and looks like I'm only using
* 2 MPs instead of all 8.
*/
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3);
+ BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(0)), 3);
PUSH_DATAh(push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
PUSH_DATA (push, 0xff);
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3);
+ BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
PUSH_DATAh(push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
PUSH_DATA (push, 0xff);
@@ -92,52 +95,53 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
* FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
* accessible. We cannot prevent that at the moment, so expect failure.
*/
- BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1);
+ BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
PUSH_DATA (push, 1 << 24);
- BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1);
+ BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
PUSH_DATA (push, 2 << 24);
- BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x0310), 1);
PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
/* NOTE: these do not affect the state used by the 3D object */
- BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ BEGIN_NVC0(push, NVE4_CP(TIC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset);
PUSH_DATA (push, screen->txc->offset);
PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
- BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ BEGIN_NVC0(push, NVE4_CP(TSC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset + 65536);
PUSH_DATA (push, screen->txc->offset + 65536);
PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
if (obj_class >= NVF0_COMPUTE_CLASS) {
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1);
- PUSH_DATA (push, 0x100);
- BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63);
- for (i = 63; i >= 1; --i)
+ /* The blob calls GK110_COMPUTE.FIRMWARE[0x6], along with the args (0x1)
+ * passed with GK110_COMPUTE.GRAPH.SCRATCH[0x2]. This is currently
+ * disabled because our firmware doesn't support these commands and the
+ * GPU hangs if they are used. */
+ BEGIN_NIC0(push, SUBC_CP(0x0248), 64);
+ for (i = 63; i >= 0; i--)
PUSH_DATA(push, 0x38000 | i);
- IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
- IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0);
+ IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0);
}
- BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1);
+ BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
PUSH_DATA (push, 0); /* does not interefere with 3D */
- if (obj_class >= NVF0_COMPUTE_CLASS)
- IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ if (obj_class == NVF0_COMPUTE_CLASS)
+ IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
/* MS sample coordinate offsets: these do not work with _ALT modes ! */
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATA (push, 0); /* 0 */
PUSH_DATA (push, 0);
@@ -157,13 +161,13 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATA (push, 1);
#ifdef DEBUG
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 28);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 8);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 8);
PUSH_DATA (push, 1);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
@@ -174,7 +178,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATA (push, 0); /* warp cfstack size */
#endif
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
return 0;
@@ -201,13 +205,13 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
* NVE4's surface load/store instructions receive all the information
* directly instead of via binding points, so we have to supply them.
*/
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
@@ -223,7 +227,7 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
}
}
if (nvc0->surfaces_dirty[t]) {
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
@@ -252,7 +256,7 @@ nve4_compute_validate_samplers(struct nvc0_context *nvc0)
{
bool need_flush = nve4_validate_tsc(nvc0, 5);
if (need_flush) {
- BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
+ BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
}
@@ -281,17 +285,17 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, n * 4);
PUSH_DATA (push, 0x1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + n);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
nvc0->textures_dirty[s] = 0;
@@ -338,29 +342,29 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
struct nvc0_program *cp = nvc0->compprog;
if (cp->parm_size) {
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset);
PUSH_DATA (push, screen->parm->offset);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, cp->parm_size);
PUSH_DATA (push, 0x1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, input, cp->parm_size / 4);
}
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 7 * 4);
PUSH_DATA (push, 0x1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, block_layout, 3);
PUSH_DATAp(push, grid_layout, 3);
PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
@@ -429,10 +433,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
}
void
-nve4_launch_grid(struct pipe_context *pipe,
- const uint *block_layout, const uint *grid_layout,
- uint32_t label,
- const void *input)
+nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -453,33 +454,34 @@ nve4_launch_grid(struct pipe_context *pipe,
if (ret)
goto out;
- nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
+ nve4_compute_setup_launch_desc(nvc0, desc, info->pc,
+ info->block, info->grid);
#ifdef DEBUG
if (debug_get_num_option("NV50_PROG_DEBUG", 0))
nve4_compute_dump_launch_desc(desc);
#endif
- nve4_compute_upload_input(nvc0, input, block_layout, grid_layout);
+ nve4_compute_upload_input(nvc0, info->input, info->block, info->grid);
/* upload descriptor and flush */
#if 0
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, desc_gpuaddr);
PUSH_DATA (push, desc_gpuaddr);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 256);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4));
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
#endif
- BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1);
+ BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1);
PUSH_DATA (push, desc_gpuaddr >> 8);
- BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1);
+ BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);
PUSH_DATA (push, 0x3);
- BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
out:
@@ -517,13 +519,13 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
PUSH_SPACE(push, 16);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, txc->offset + (tic->id * 32));
PUSH_DATA (push, txc->offset + (tic->id * 32));
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 32);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, &tic->tic[0], 8);
@@ -546,11 +548,11 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
if (n[0]) {
- BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]);
+ BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]);
PUSH_DATAp(push, commands[0], n[0]);
}
if (n[1]) {
- BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]);
+ BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]);
PUSH_DATAp(push, commands[1], n[1]);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
index 7364a68a579..84f8593b9b6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
@@ -2,7 +2,6 @@
#ifndef NVE4_COMPUTE_H
#define NVE4_COMPUTE_H
-#include "nv50/nv50_defs.xml.h"
#include "nvc0/nve4_compute.xml.h"
/* Input space is implemented as c0[], to which we bind the screen->parm bo.
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a2b7f87855d..15a94d90721 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -319,11 +319,14 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
}
break;
case PIPE_SHADER_VERTEX:
@@ -378,11 +381,14 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
}
break;
}
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 56c7fb93f73..997e5f0e383 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -553,25 +553,24 @@ void evergreen_emit_cs_shader(
}
static void evergreen_launch_grid(
- struct pipe_context *ctx_,
- const uint *block_layout, const uint *grid_layout,
- uint32_t pc, const void *input)
+ struct pipe_context *ctx_, const struct pipe_grid_info *info)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
#ifdef HAVE_OPENCL
struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
boolean use_kill;
- ctx->cs_shader_state.pc = pc;
+ ctx->cs_shader_state.pc = info->pc;
/* Get the config information for this kernel. */
- r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill);
+ r600_shader_binary_read_config(&shader->binary, &shader->bc,
+ info->pc, &use_kill);
#endif
- COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
+ COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc);
- evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
- compute_emit_cs(ctx, block_layout, grid_layout);
+ evergreen_compute_upload_input(ctx_, info->block, info->grid, info->input);
+ compute_emit_cs(ctx, info->block, info->grid);
}
static void evergreen_set_compute_resources(struct pipe_context * ctx_,
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 0fe7c74418d..7eab29c6eb4 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -784,12 +784,12 @@ LLVMModuleRef r600_tgsi_llvm(
{
struct tgsi_shader_info shader_info;
struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
- radeon_llvm_context_init(ctx);
+ radeon_llvm_context_init(ctx, "r600--");
LLVMTypeRef Arguments[32];
unsigned ArgumentsCount = 0;
for (unsigned i = 0; i < ctx->inputs_count; i++)
Arguments[ArgumentsCount++] = LLVMVectorType(bld_base->base.elem_type, 4);
- radeon_llvm_create_func(ctx, Arguments, ArgumentsCount);
+ radeon_llvm_create_func(ctx, NULL, 0, Arguments, ArgumentsCount);
for (unsigned i = 0; i < ctx->inputs_count; i++) {
LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
LLVMAddAttribute(P, LLVMInRegAttribute);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index c8580d807d7..7fb4108a188 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -532,6 +532,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
} else {
return PIPE_SHADER_IR_TGSI;
}
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
if (rscreen->b.family == CHIP_CYPRESS ||
rscreen->b.family == CHIP_CAYMAN || rscreen->b.family == CHIP_ARUBA)
@@ -541,6 +543,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
/* due to a bug in the shader compiler, some loops hang
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 324d2719f44..ea028272ccd 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -411,6 +411,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
{ "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
+ { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
DEBUG_NAMED_VALUE_END /* must be last */
};
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index e92df876c22..7df617737a7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -89,6 +89,7 @@
#define DBG_NO_DCC_CLEAR (1llu << 44)
#define DBG_NO_RB_PLUS (1llu << 45)
#define DBG_SI_SCHED (1llu << 46)
+#define DBG_MONOLITHIC_SHADERS (1llu << 47)
#define R600_MAP_BUFFER_ALIGNMENT 64
@@ -96,7 +97,7 @@ struct r600_common_context;
struct r600_perfcounters;
struct radeon_shader_reloc {
- char *name;
+ char name[32];
uint64_t offset;
};
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index af206e43860..1df0c300e85 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1293,6 +1293,7 @@ unsigned r600_translate_colorswap(enum pipe_format format)
break;
case 4:
/* check the middle channels, the 1st and 4th channel can be NONE */
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z))
return V_0280A0_SWAP_STD; /* XYZW */
else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y))
@@ -1301,6 +1302,16 @@ unsigned r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_ALT; /* ZYXW */
else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,Y))
return V_0280A0_SWAP_ALT_REV; /* WXYZ */
+#else
+ if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,X))
+ return V_0280A0_SWAP_STD; /* ZWXY */
+ else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,W))
+ return V_0280A0_SWAP_STD_REV; /* YXWZ */
+ else if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,Z))
+ return V_0280A0_SWAP_ALT; /* XWZY */
+ else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W))
+ return V_0280A0_SWAP_ALT_REV; /* YZWX */
+#endif
break;
}
return ~0U;
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c
index 2e45d439e7a..8aaa85d02f6 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.c
+++ b/src/gallium/drivers/radeon/radeon_elf_util.c
@@ -98,7 +98,8 @@ static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
reloc->offset = rel.r_offset;
- reloc->name = strdup(symbol_name);
+ strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1);
+ reloc->name[sizeof(reloc->name)-1] = 0;
}
}
@@ -194,26 +195,3 @@ const unsigned char *radeon_shader_binary_config_start(
}
return binary->config;
}
-
-void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
- unsigned reloc_count)
-{
- unsigned i;
- for (i = 0; i < reloc_count; i++) {
- FREE(relocs[i].name);
- }
- FREE(relocs);
-}
-
-void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
- unsigned free_relocs)
-{
- FREE(binary->code);
- FREE(binary->config);
- FREE(binary->rodata);
-
- if (free_relocs) {
- radeon_shader_binary_free_relocs(binary->relocs,
- binary->reloc_count);
- }
-}
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.h b/src/gallium/drivers/radeon/radeon_elf_util.h
index ea4ab2f14b2..c2af9e0dfe0 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.h
+++ b/src/gallium/drivers/radeon/radeon_elf_util.h
@@ -47,18 +47,4 @@ const unsigned char *radeon_shader_binary_config_start(
const struct radeon_shader_binary *binary,
uint64_t symbol_offset);
-/**
- * Free all memory allocated for members of \p binary. This function does
- * not free \p binary.
- *
- * @param free_relocs If false, reolc information will not be freed.
- */
-void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
- unsigned free_relocs);
-
-/**
- * Free \p relocs and all member data.
- */
-void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
- unsigned reloc_count);
#endif /* RADEON_ELF_UTIL_H */
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index e967ad2214e..bdee2f8020a 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -113,6 +113,7 @@ struct radeon_llvm_context {
struct tgsi_declaration_range *arrays;
LLVMValueRef main_fn;
+ LLVMTypeRef return_type;
struct gallivm_state gallivm;
};
@@ -158,10 +159,12 @@ void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_bas
LLVMValueRef *coords_arg,
LLVMValueRef *derivs_arg);
-void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
+void radeon_llvm_context_init(struct radeon_llvm_context * ctx,
+ const char *triple);
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
- LLVMTypeRef *ParamTypes, unsigned ParamCount);
+ LLVMTypeRef *return_types, unsigned num_return_elems,
+ LLVMTypeRef *ParamTypes, unsigned ParamCount);
void radeon_llvm_dispose(struct radeon_llvm_context * ctx);
diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c
index 0dfd9ad4867..da19533b862 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_util.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
@@ -55,8 +55,10 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
static void radeon_llvm_optimize(LLVMModuleRef mod)
{
+#if HAVE_LLVM < 0x0309
const char *data_layout = LLVMGetDataLayout(mod);
LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
+#endif
LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
@@ -77,14 +79,18 @@ static void radeon_llvm_optimize(LLVMModuleRef mod)
}
}
+#if HAVE_LLVM < 0x0309
LLVMAddTargetData(TD, pass_manager);
+#endif
LLVMAddAlwaysInlinerPass(pass_manager);
LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
LLVMRunPassManager(pass_manager, mod);
LLVMPassManagerBuilderDispose(builder);
LLVMDisposePassManager(pass_manager);
+#if HAVE_LLVM < 0x0309
LLVMDisposeTargetData(TD);
+#endif
}
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index f5e3f6af1a0..c74397fb5c9 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -30,6 +30,7 @@
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_misc.h"
#include "gallivm/lp_bld_swizzle.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
@@ -1520,7 +1521,7 @@ static void emit_up2h(const struct lp_build_tgsi_action *action,
}
}
-void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
+void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple)
{
struct lp_type type;
@@ -1534,6 +1535,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
ctx->gallivm.context = LLVMContextCreate();
ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
ctx->gallivm.context);
+ LLVMSetTarget(ctx->gallivm.module,
+
+#if HAVE_LLVM < 0x0306
+ "r600--");
+#else
+ triple);
+#endif
ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
@@ -1693,14 +1701,22 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
}
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
+ LLVMTypeRef *return_types, unsigned num_return_elems,
LLVMTypeRef *ParamTypes, unsigned ParamCount)
{
- LLVMTypeRef main_fn_type;
+ LLVMTypeRef main_fn_type, ret_type;
LLVMBasicBlockRef main_fn_body;
+ if (num_return_elems)
+ ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
+ return_types,
+ num_return_elems, true);
+ else
+ ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
+
/* Setup the function */
- main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context),
- ParamTypes, ParamCount, 0);
+ ctx->return_type = ret_type;
+ main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
ctx->main_fn, "main_body");
@@ -1710,11 +1726,16 @@ void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
{
struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
+ const char *triple = LLVMGetTarget(gallivm->module);
+ LLVMTargetLibraryInfoRef target_library_info;
/* Create the pass manager */
gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
gallivm->module);
+ target_library_info = gallivm_create_target_library_info(triple);
+ LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
+
/* This pass should eliminate all the load and store instructions */
LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
@@ -1730,7 +1751,7 @@ void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
LLVMDisposeBuilder(gallivm->builder);
LLVMDisposePassManager(gallivm->passmgr);
-
+ gallivm_dispose_target_library_info(target_library_info);
}
void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 7370a113d3d..9f5f4c682bc 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -196,9 +196,7 @@ static unsigned compute_num_waves_for_scratch(
}
static void si_launch_grid(
- struct pipe_context *ctx,
- const uint *block_layout, const uint *grid_layout,
- uint32_t pc, const void *input)
+ struct pipe_context *ctx, const struct pipe_grid_info *info)
{
struct si_context *sctx = (struct si_context*)ctx;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
@@ -232,7 +230,7 @@ static void si_launch_grid(
pm4->compute_pkt = true;
/* Read the config information */
- si_shader_binary_read_config(&shader->binary, &shader->config, pc);
+ si_shader_binary_read_config(&shader->binary, &shader->config, info->pc);
/* Upload the kernel arguments */
@@ -242,15 +240,16 @@ static void si_launch_grid(
kernel_args = sctx->b.ws->buffer_map(input_buffer->buf,
sctx->b.gfx.cs, PIPE_TRANSFER_WRITE);
for (i = 0; i < 3; i++) {
- kernel_args[i] = grid_layout[i];
- kernel_args[i + 3] = grid_layout[i] * block_layout[i];
- kernel_args[i + 6] = block_layout[i];
+ kernel_args[i] = info->grid[i];
+ kernel_args[i + 3] = info->grid[i] * info->block[i];
+ kernel_args[i + 6] = info->block[i];
}
num_waves_for_scratch = compute_num_waves_for_scratch(
- &sctx->screen->b.info, block_layout, grid_layout);
+ &sctx->screen->b.info, info->block, info->grid);
- memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size);
+ memcpy(kernel_args + (num_work_size_bytes / 4), info->input,
+ program->input_size);
if (shader->config.scratch_bytes_per_wave > 0) {
@@ -291,11 +290,11 @@ static void si_launch_grid(
si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0);
si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
- S_00B81C_NUM_THREAD_FULL(block_layout[0]));
+ S_00B81C_NUM_THREAD_FULL(info->block[0]));
si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
- S_00B820_NUM_THREAD_FULL(block_layout[1]));
+ S_00B820_NUM_THREAD_FULL(info->block[1]));
si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
- S_00B824_NUM_THREAD_FULL(block_layout[2]));
+ S_00B824_NUM_THREAD_FULL(info->block[2]));
/* Global buffers */
for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
@@ -323,7 +322,7 @@ static void si_launch_grid(
}
shader_va = shader->bo->gpu_address;
- shader_va += pc;
+ shader_va += info->pc;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
@@ -375,9 +374,9 @@ static void si_launch_grid(
;
si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
- si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */
- si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */
- si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */
+ si_pm4_cmd_add(pm4, info->grid[0]); /* Thread groups DIM_X */
+ si_pm4_cmd_add(pm4, info->grid[1]); /* Thread groups DIM_Y */
+ si_pm4_cmd_add(pm4, info->grid[2]); /* Thread gropus DIM_Z */
si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
si_pm4_cmd_end(pm4, false);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index e9d69d2db38..37fd4a25d59 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -22,6 +22,7 @@
*/
#include "si_pipe.h"
+#include "si_shader.h"
#include "si_public.h"
#include "sid.h"
@@ -448,6 +449,10 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_NATIVE;
+
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
+
case PIPE_SHADER_CAP_DOUBLES:
return HAVE_LLVM >= 0x0307;
@@ -511,6 +516,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
return HAVE_LLVM >= 0x0307;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
@@ -522,6 +529,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
}
return 0;
@@ -530,6 +538,14 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
static void si_destroy_screen(struct pipe_screen* pscreen)
{
struct si_screen *sscreen = (struct si_screen *)pscreen;
+ struct si_shader_part *parts[] = {
+ sscreen->vs_prologs,
+ sscreen->vs_epilogs,
+ sscreen->tcs_epilogs,
+ sscreen->ps_prologs,
+ sscreen->ps_epilogs
+ };
+ unsigned i;
if (!sscreen)
return;
@@ -537,6 +553,18 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
if (!sscreen->b.ws->unref(sscreen->b.ws))
return;
+ /* Free shader parts. */
+ for (i = 0; i < ARRAY_SIZE(parts); i++) {
+ while (parts[i]) {
+ struct si_shader_part *part = parts[i];
+
+ parts[i] = part->next;
+ radeon_shader_binary_clean(&part->binary);
+ FREE(part);
+ }
+ }
+ pipe_mutex_destroy(sscreen->shader_parts_mutex);
+ si_destroy_shader_cache(sscreen);
r600_destroy_common_screen(&sscreen->b);
}
@@ -584,7 +612,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.b.resource_create = r600_resource_create_common;
if (!r600_common_screen_init(&sscreen->b, ws) ||
- !si_init_gs_info(sscreen)) {
+ !si_init_gs_info(sscreen) ||
+ !si_init_shader_cache(sscreen)) {
FREE(sscreen);
return NULL;
}
@@ -594,6 +623,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
+ pipe_mutex_init(sscreen->shader_parts_mutex);
+ sscreen->use_monolithic_shaders =
+ HAVE_LLVM < 0x0308 ||
+ (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b5790d6b564..ef860a58b83 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -80,10 +80,36 @@
#define SI_MAX_BORDER_COLORS 4096
struct si_compute;
+struct hash_table;
struct si_screen {
struct r600_common_screen b;
unsigned gs_table_depth;
+
+ /* Whether shaders are monolithic (1-part) or separate (3-part). */
+ bool use_monolithic_shaders;
+
+ pipe_mutex shader_parts_mutex;
+ struct si_shader_part *vs_prologs;
+ struct si_shader_part *vs_epilogs;
+ struct si_shader_part *tcs_epilogs;
+ struct si_shader_part *ps_prologs;
+ struct si_shader_part *ps_epilogs;
+
+ /* Shader cache in memory.
+ *
+ * Design & limitations:
+ * - The shader cache is per screen (= per process), never saved to
+ * disk, and skips redundant shader compilations from TGSI to bytecode.
+ * - It can only be used with one-variant-per-shader support, in which
+ * case only the main (typically middle) part of shaders is cached.
+ * - Only VS, TCS, TES, PS are cached, out of which only the hw VS
+ * variants of VS and TES are cached, so LS and ES aren't.
+ * - GS and CS aren't cached, but it's certainly possible to cache
+ * those as well.
+ */
+ pipe_mutex shader_cache_mutex;
+ struct hash_table *shader_cache;
};
struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index baa1090e2fb..57458ae1381 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,6 +70,12 @@ struct si_shader_context
unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
bool is_gs_copy_shader;
+
+ /* Whether to generate the optimized shader variant compiled as a whole
+ * (without a prolog and epilog)
+ */
+ bool is_monolithic;
+
int param_streamout_config;
int param_streamout_write_index;
int param_streamout_offset[4];
@@ -77,6 +83,7 @@ struct si_shader_context
int param_rel_auto_id;
int param_vs_prim_id;
int param_instance_id;
+ int param_vertex_index0;
int param_tes_u;
int param_tes_v;
int param_tes_rel_patch_id;
@@ -96,14 +103,17 @@ struct si_shader_context
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
LLVMValueRef gs_next_vertex[4];
+ LLVMValueRef return_value;
LLVMTypeRef voidt;
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i32;
+ LLVMTypeRef i64;
LLVMTypeRef i128;
LLVMTypeRef f32;
LLVMTypeRef v16i8;
+ LLVMTypeRef v2i32;
LLVMTypeRef v4i32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
@@ -118,9 +128,17 @@ static struct si_shader_context *si_shader_context(
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
struct si_shader *shader,
- LLVMTargetMachineRef tm,
- struct tgsi_shader_info *info);
+ LLVMTargetMachineRef tm);
+/* Ideally pass the sample mask input to the PS epilog as v13, which
+ * is its usual location, so that the shader doesn't have to add v_mov.
+ */
+#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
+
+/* The VS location of the PrimitiveID input is the same in the epilog,
+ * so that the main shader part doesn't have to move it.
+ */
+#define VS_EPILOG_PRIMID_LOC 2
#define PERSPECTIVE_BASE 0
#define LINEAR_BASE 9
@@ -196,6 +214,10 @@ static LLVMValueRef unpack_param(struct si_shader_context *ctx,
LLVMValueRef value = LLVMGetParam(ctx->radeon_bld.main_fn,
param);
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
+ value = bitcast(&ctx->radeon_bld.soa.bld_base,
+ TGSI_TYPE_UNSIGNED, value);
+
if (rshift)
value = LLVMBuildLShr(gallivm->builder, value,
lp_build_const_int32(gallivm, rshift), "");
@@ -375,7 +397,7 @@ static LLVMValueRef build_indexed_load_const(
static LLVMValueRef get_instance_index_for_fetch(
struct radeon_llvm_context *radeon_bld,
- unsigned divisor)
+ unsigned param_start_instance, unsigned divisor)
{
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
@@ -389,8 +411,8 @@ static LLVMValueRef get_instance_index_for_fetch(
result = LLVMBuildUDiv(gallivm->builder, result,
lp_build_const_int32(gallivm, divisor), "");
- return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
- radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
+ return LLVMBuildAdd(gallivm->builder, result,
+ LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
}
static void declare_input_vs(
@@ -402,7 +424,8 @@ static void declare_input_vs(
struct gallivm_state *gallivm = base->gallivm;
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
- unsigned divisor = ctx->shader->key.vs.instance_divisors[input_index];
+ unsigned divisor =
+ ctx->shader->key.vs.prolog.instance_divisors[input_index];
unsigned chan;
@@ -424,10 +447,16 @@ static void declare_input_vs(
/* Build the attribute offset */
attribute_offset = lp_build_const_int32(gallivm, 0);
- if (divisor) {
+ if (!ctx->is_monolithic) {
+ buffer_index = LLVMGetParam(radeon_bld->main_fn,
+ ctx->param_vertex_index0 +
+ input_index);
+ } else if (divisor) {
/* Build index from instance ID, start instance and divisor */
- ctx->shader->uses_instanceid = true;
- buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, divisor);
+ ctx->shader->info.uses_instanceid = true;
+ buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld,
+ SI_PARAM_START_INSTANCE,
+ divisor);
} else {
/* Load the buffer index for vertices. */
LLVMValueRef vertex_id = LLVMGetParam(ctx->radeon_bld.main_fn,
@@ -853,7 +882,8 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
static unsigned select_interp_param(struct si_shader_context *ctx,
unsigned param)
{
- if (!ctx->shader->key.ps.force_persample_interp)
+ if (!ctx->shader->key.ps.prolog.force_persample_interp ||
+ !ctx->is_monolithic)
return param;
/* If the shader doesn't use center/centroid, just return the parameter.
@@ -923,7 +953,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
if (semantic_name == TGSI_SEMANTIC_COLOR &&
- ctx->shader->key.ps.color_two_side) {
+ ctx->shader->key.ps.prolog.color_two_side) {
LLVMValueRef args[4];
LLVMValueRef is_face_positive;
LLVMValueRef back_attr_number;
@@ -997,6 +1027,7 @@ static void declare_input_fs(
unsigned input_index,
const struct tgsi_full_declaration *decl)
{
+ struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
struct si_shader *shader = ctx->shader;
@@ -1004,6 +1035,26 @@ static void declare_input_fs(
LLVMValueRef interp_param = NULL;
int interp_param_idx;
+ /* Get colors from input VGPRs (set by the prolog). */
+ if (!ctx->is_monolithic &&
+ decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+ unsigned i = decl->Semantic.Index;
+ unsigned colors_read = shader->selector->info.colors_read;
+ unsigned mask = colors_read >> (i * 4);
+ unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
+ (i ? util_bitcount(colors_read & 0xf) : 0);
+
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
+ mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
+ mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
+ mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
+ mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ return;
+ }
+
interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
decl->Interp.Location);
if (interp_param_idx == -1)
@@ -1330,12 +1381,12 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
const union si_shader_key *key = &ctx->shader->key;
- unsigned col_formats = key->ps.spi_shader_col_format;
+ unsigned col_formats = key->ps.epilog.spi_shader_col_format;
int cbuf = target - V_008DFC_SQ_EXP_MRT;
assert(cbuf >= 0 && cbuf < 8);
spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
- is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1;
+ is_int8 = (key->ps.epilog.color_is_int8 >> cbuf) & 0x1;
}
args[4] = uint->zero; /* COMPR flag */
@@ -1488,13 +1539,13 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- if (ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
+ if (ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
LLVMValueRef alpha_ref = LLVMGetParam(ctx->radeon_bld.main_fn,
SI_PARAM_ALPHA_REF);
LLVMValueRef alpha_pass =
lp_build_cmp(&bld_base->base,
- ctx->shader->key.ps.alpha_func,
+ ctx->shader->key.ps.epilog.alpha_func,
alpha, alpha_ref);
LLVMValueRef arg =
lp_build_select(&bld_base->base,
@@ -1511,7 +1562,8 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
}
static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef alpha)
+ LLVMValueRef alpha,
+ unsigned samplemask_param)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -1519,7 +1571,7 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *
/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
coverage = LLVMGetParam(ctx->radeon_bld.main_fn,
- SI_PARAM_SAMPLE_COVERAGE);
+ samplemask_param);
coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
@@ -1841,7 +1893,8 @@ handle_semantic:
case TGSI_SEMANTIC_COLOR:
case TGSI_SEMANTIC_BCOLOR:
target = V_008DFC_SQ_EXP_PARAM + param_count;
- shader->vs_output_param_offset[i] = param_count;
+ assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[i] = param_count;
param_count++;
break;
case TGSI_SEMANTIC_CLIPDIST:
@@ -1855,7 +1908,8 @@ handle_semantic:
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_GENERIC:
target = V_008DFC_SQ_EXP_PARAM + param_count;
- shader->vs_output_param_offset[i] = param_count;
+ assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[i] = param_count;
param_count++;
break;
default:
@@ -1883,7 +1937,7 @@ handle_semantic:
}
}
- shader->nr_param_exports = param_count;
+ shader->info.nr_param_exports = param_count;
/* We need to add the position output manually if it's missing. */
if (!pos_args[0][0]) {
@@ -1945,7 +1999,7 @@ handle_semantic:
for (i = 0; i < 4; i++)
if (pos_args[i][0])
- shader->nr_pos_exports++;
+ shader->info.nr_pos_exports++;
pos_idx = 0;
for (i = 0; i < 4; i++) {
@@ -1955,7 +2009,7 @@ handle_semantic:
/* Specify the target we are exporting */
pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
- if (pos_idx == shader->nr_pos_exports)
+ if (pos_idx == shader->info.nr_pos_exports)
/* Specify that this is the last export */
pos_args[i][2] = uint->one;
@@ -1989,7 +2043,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
invocation_id, bld_base->uint_bld.zero, ""));
/* Determine the layout of one tess factor element in the buffer. */
- switch (shader->key.tcs.prim_mode) {
+ switch (shader->key.tcs.epilog.prim_mode) {
case PIPE_PRIM_LINES:
stride = 2; /* 2 dwords, 1 vec2 store */
outer_comps = 2;
@@ -2061,14 +2115,51 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef invocation_id;
+ LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
+ rel_patch_id = get_rel_patch_id(ctx);
invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
+ tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
- si_write_tess_factors(bld_base,
- get_rel_patch_id(ctx),
- invocation_id,
- get_tcs_out_current_patch_data_offset(ctx));
+ if (!ctx->is_monolithic) {
+ /* Return epilog parameters from this function. */
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMValueRef ret = ctx->return_value;
+ LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
+ unsigned vgpr;
+
+ /* RW_BUFFERS pointer */
+ rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
+ rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, "");
+ rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, "");
+ rw0 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.zero, "");
+ rw1 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.one, "");
+ ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
+ ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");
+
+ /* Tess factor buffer soffset is after user SGPRs. */
+ tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_TESS_FACTOR_OFFSET);
+ ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
+ SI_TCS_NUM_USER_SGPR, "");
+
+ /* VGPRs */
+ rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
+ invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
+ tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
+
+ vgpr = SI_TCS_NUM_USER_SGPR + 1;
+ ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
+ ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
+ ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
+ ctx->return_value = ret;
+ return;
+ }
+
+ si_write_tess_factors(bld_base, rel_patch_id, invocation_id, tf_lds_offset);
}
static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
@@ -2214,16 +2305,26 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
"");
}
- /* Export PrimitiveID when PS needs it. */
- if (si_vs_exports_prim_id(ctx->shader)) {
- outputs[i].name = TGSI_SEMANTIC_PRIMID;
- outputs[i].sid = 0;
- outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- get_primitive_id(bld_base, 0));
- outputs[i].values[1] = bld_base->base.undef;
- outputs[i].values[2] = bld_base->base.undef;
- outputs[i].values[3] = bld_base->base.undef;
- i++;
+ if (ctx->is_monolithic) {
+ /* Export PrimitiveID when PS needs it. */
+ if (si_vs_exports_prim_id(ctx->shader)) {
+ outputs[i].name = TGSI_SEMANTIC_PRIMID;
+ outputs[i].sid = 0;
+ outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ get_primitive_id(bld_base, 0));
+ outputs[i].values[1] = bld_base->base.undef;
+ outputs[i].values[2] = bld_base->base.undef;
+ outputs[i].values[3] = bld_base->base.undef;
+ i++;
+ }
+ } else {
+ /* Return the primitive ID from the LLVM function. */
+ ctx->return_value =
+ LLVMBuildInsertValue(gallivm->builder,
+ ctx->return_value,
+ bitcast(bld_base, TGSI_TYPE_FLOAT,
+ get_primitive_id(bld_base, 0)),
+ VS_EPILOG_PRIMID_LOC, "");
}
si_llvm_export_vs(bld_base, outputs, i);
@@ -2284,6 +2385,7 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
LLVMValueRef *color, unsigned index,
+ unsigned samplemask_param,
bool is_last)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -2291,30 +2393,31 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
int i;
/* Clamp color */
- if (ctx->shader->key.ps.clamp_color)
+ if (ctx->shader->key.ps.epilog.clamp_color)
for (i = 0; i < 4; i++)
color[i] = radeon_llvm_saturate(bld_base, color[i]);
/* Alpha to one */
- if (ctx->shader->key.ps.alpha_to_one)
+ if (ctx->shader->key.ps.epilog.alpha_to_one)
color[3] = base->one;
/* Alpha test */
if (index == 0 &&
- ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
+ ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
si_alpha_test(bld_base, color[3]);
/* Line & polygon smoothing */
- if (ctx->shader->key.ps.poly_line_smoothing)
- color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
+ if (ctx->shader->key.ps.epilog.poly_line_smoothing)
+ color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3],
+ samplemask_param);
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
- if (ctx->shader->key.ps.last_cbuf > 0) {
+ if (ctx->shader->key.ps.epilog.last_cbuf > 0) {
LLVMValueRef args[8][9];
int c, last = -1;
/* Get the export arguments, also find out what the last one is. */
- for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) {
+ for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
si_llvm_init_export_args(bld_base, color,
V_008DFC_SQ_EXP_MRT + c, args[c]);
if (args[c][0] != bld_base->uint_bld.zero)
@@ -2322,7 +2425,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
}
/* Emit all exports. */
- for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) {
+ for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
if (is_last && last == c) {
args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
args[c][2] = bld_base->uint_bld.one; /* DONE bit */
@@ -2385,11 +2488,11 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
* Otherwise, find the last color export.
*/
if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) {
- unsigned spi_format = shader->key.ps.spi_shader_col_format;
+ unsigned spi_format = shader->key.ps.epilog.spi_shader_col_format;
/* Don't export NULL and return if alpha-test is enabled. */
- if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS &&
- shader->key.ps.alpha_func != PIPE_FUNC_NEVER &&
+ if (shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS &&
+ shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER &&
(spi_format & 0xf) == 0)
spi_format |= V_028714_SPI_SHADER_32_AR;
@@ -2400,10 +2503,10 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
continue;
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
- if (shader->key.ps.last_cbuf > 0) {
+ if (shader->key.ps.epilog.last_cbuf > 0) {
/* Just set this if any of the colorbuffers are enabled. */
if (spi_format &
- ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1))
+ ((1llu << (4 * (shader->key.ps.epilog.last_cbuf + 1))) - 1))
last_color_export = i;
continue;
}
@@ -2445,6 +2548,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
ctx->radeon_bld.soa.outputs[i][j], "");
si_export_mrt_color(bld_base, color, semantic_index,
+ SI_PARAM_SAMPLE_COVERAGE,
last_color_export == i);
break;
default:
@@ -2458,6 +2562,100 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
si_export_mrt_z(bld_base, depth, stencil, samplemask);
}
+/**
+ * Return PS outputs in this order:
+ *
+ * v[0:3] = color0.xyzw
+ * v[4:7] = color1.xyzw
+ * ...
+ * vN+0 = Depth
+ * vN+1 = Stencil
+ * vN+2 = SampleMask
+ * vN+3 = SampleMaskIn (used for OpenGL smoothing)
+ *
+ * The alpha-ref SGPR is returned via its original location.
+ */
+static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct si_shader *shader = ctx->shader;
+ struct lp_build_context *base = &bld_base->base;
+ struct tgsi_shader_info *info = &shader->selector->info;
+ LLVMBuilderRef builder = base->gallivm->builder;
+ unsigned i, j, first_vgpr, vgpr;
+
+ LLVMValueRef color[8][4] = {};
+ LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+ LLVMValueRef ret;
+
+ /* Read the output values. */
+ for (i = 0; i < info->num_outputs; i++) {
+ unsigned semantic_name = info->output_semantic_name[i];
+ unsigned semantic_index = info->output_semantic_index[i];
+
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_COLOR:
+ assert(semantic_index < 8);
+ for (j = 0; j < 4; j++) {
+ LLVMValueRef ptr = ctx->radeon_bld.soa.outputs[i][j];
+ LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
+ color[semantic_index][j] = result;
+ }
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ depth = LLVMBuildLoad(builder,
+ ctx->radeon_bld.soa.outputs[i][2], "");
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ stencil = LLVMBuildLoad(builder,
+ ctx->radeon_bld.soa.outputs[i][1], "");
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ samplemask = LLVMBuildLoad(builder,
+ ctx->radeon_bld.soa.outputs[i][0], "");
+ break;
+ default:
+ fprintf(stderr, "Warning: SI unhandled fs output type:%d\n",
+ semantic_name);
+ }
+ }
+
+ /* Fill the return structure. */
+ ret = ctx->return_value;
+
+ /* Set SGPRs. */
+ ret = LLVMBuildInsertValue(builder, ret,
+ bitcast(bld_base, TGSI_TYPE_SIGNED,
+ LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_ALPHA_REF)),
+ SI_SGPR_ALPHA_REF, "");
+
+ /* Set VGPRs */
+ first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1;
+ for (i = 0; i < ARRAY_SIZE(color); i++) {
+ if (!color[i][0])
+ continue;
+
+ for (j = 0; j < 4; j++)
+ ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
+ }
+ if (depth)
+ ret = LLVMBuildInsertValue(builder, ret, depth, vgpr++, "");
+ if (stencil)
+ ret = LLVMBuildInsertValue(builder, ret, stencil, vgpr++, "");
+ if (samplemask)
+ ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, "");
+
+ /* Add the input sample mask for smoothing at the end. */
+ if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC)
+ vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC;
+ ret = LLVMBuildInsertValue(builder, ret,
+ LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_SAMPLE_COVERAGE), vgpr++, "");
+
+ ctx->return_value = ret;
+}
+
static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data);
@@ -2536,13 +2734,12 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
/**
* Load an image view, fmask view. or sampler state descriptor.
*/
-static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
- LLVMValueRef index, enum desc_type type)
+static LLVMValueRef get_sampler_desc_custom(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum desc_type type)
{
struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
- SI_PARAM_SAMPLERS);
switch (type) {
case DESC_IMAGE:
@@ -2558,12 +2755,21 @@ static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
/* The sampler state is at [12:15]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
- ptr = LLVMBuildPointerCast(builder, ptr,
- const_array(ctx->v4i32, 0), "");
+ list = LLVMBuildPointerCast(builder, list,
+ const_array(ctx->v4i32, 0), "");
break;
}
- return build_indexed_load_const(ctx, ptr, index);
+ return build_indexed_load_const(ctx, list, index);
+}
+
+static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
+ LLVMValueRef index, enum desc_type type)
+{
+ LLVMValueRef list = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_SAMPLERS);
+
+ return get_sampler_desc_custom(ctx, list, index, type);
}
static void tex_fetch_ptrs(
@@ -3546,6 +3752,30 @@ static const struct lp_build_tgsi_action interp_action = {
.emit = build_interp_intrinsic,
};
+static void si_create_function(struct si_shader_context *ctx,
+ LLVMTypeRef *returns, unsigned num_returns,
+ LLVMTypeRef *params, unsigned num_params,
+ int last_array_pointer, int last_sgpr)
+{
+ int i;
+
+ radeon_llvm_create_func(&ctx->radeon_bld, returns, num_returns,
+ params, num_params);
+ radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type);
+ ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type);
+
+ for (i = 0; i <= last_sgpr; ++i) {
+ LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
+
+ /* We tell llvm that array inputs are passed by value to allow Sinking pass
+ * to move load. Inputs are constant so this is fine. */
+ if (i <= last_array_pointer)
+ LLVMAddAttribute(P, LLVMByValAttribute);
+ else
+ LLVMAddAttribute(P, LLVMInRegAttribute);
+ }
+}
+
static void create_meta_data(struct si_shader_context *ctx)
{
struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm;
@@ -3579,15 +3809,57 @@ static void declare_streamout_params(struct si_shader_context *ctx,
}
}
+static unsigned llvm_get_type_size(LLVMTypeRef type)
+{
+ LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+ switch (kind) {
+ case LLVMIntegerTypeKind:
+ return LLVMGetIntTypeWidth(type) / 8;
+ case LLVMFloatTypeKind:
+ return 4;
+ case LLVMPointerTypeKind:
+ return 8;
+ case LLVMVectorTypeKind:
+ return LLVMGetVectorSize(type) *
+ llvm_get_type_size(LLVMGetElementType(type));
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void declare_tess_lds(struct si_shader_context *ctx)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ LLVMTypeRef i32 = ctx->radeon_bld.soa.bld_base.uint_bld.elem_type;
+
+ /* This is the upper bound, maximum is 32 inputs times 32 vertices */
+ unsigned vertex_data_dw_size = 32*32*4;
+ unsigned patch_data_dw_size = 32*4;
+ /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
+ unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
+ unsigned lds_dwords = patch_dw_size;
+
+ /* The actual size is computed outside of the shader to reduce
+ * the number of shader variants. */
+ ctx->lds =
+ LLVMAddGlobalInAddressSpace(gallivm->module,
+ LLVMArrayType(i32, lds_dwords),
+ "tess_lds",
+ LOCAL_ADDR_SPACE);
+}
+
static void create_function(struct si_shader_context *ctx)
{
struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_shader *shader = ctx->shader;
- LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32;
- unsigned i, last_array_pointer, last_sgpr, num_params;
+ LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
+ LLVMTypeRef returns[16+32*4];
+ unsigned i, last_array_pointer, last_sgpr, num_params, num_return_sgprs;
+ unsigned num_returns = 0;
- v2i32 = LLVMVectorType(ctx->i32, 2);
v3i32 = LLVMVectorType(ctx->i32, 3);
params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
@@ -3630,6 +3902,20 @@ static void create_function(struct si_shader_context *ctx)
params[ctx->param_rel_auto_id = num_params++] = ctx->i32;
params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
params[ctx->param_instance_id = num_params++] = ctx->i32;
+
+ if (!ctx->is_monolithic &&
+ !ctx->is_gs_copy_shader) {
+ /* Vertex load indices. */
+ ctx->param_vertex_index0 = num_params;
+
+ for (i = 0; i < shader->selector->info.num_inputs; i++)
+ params[num_params++] = ctx->i32;
+
+ /* PrimitiveID output. */
+ if (!shader->key.vs.as_es && !shader->key.vs.as_ls)
+ for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+ returns[num_returns++] = ctx->f32;
+ }
break;
case TGSI_PROCESSOR_TESS_CTRL:
@@ -3643,6 +3929,15 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_PATCH_ID] = ctx->i32;
params[SI_PARAM_REL_IDS] = ctx->i32;
num_params = SI_PARAM_REL_IDS+1;
+
+ if (!ctx->is_monolithic) {
+ /* PARAM_TESS_FACTOR_OFFSET is after user SGPRs. */
+ for (i = 0; i <= SI_TCS_NUM_USER_SGPR; i++)
+ returns[num_returns++] = ctx->i32; /* SGPRs */
+
+ for (i = 0; i < 3; i++)
+ returns[num_returns++] = ctx->f32; /* VGPRs */
+ }
break;
case TGSI_PROCESSOR_TESS_EVAL:
@@ -3663,6 +3958,11 @@ static void create_function(struct si_shader_context *ctx)
params[ctx->param_tes_v = num_params++] = ctx->f32;
params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32;
params[ctx->param_tes_patch_id = num_params++] = ctx->i32;
+
+ /* PrimitiveID output. */
+ if (!ctx->is_monolithic && !shader->key.tes.as_es)
+ for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+ returns[num_returns++] = ctx->f32;
break;
case TGSI_PROCESSOR_GEOMETRY:
@@ -3686,13 +3986,13 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_ALPHA_REF] = ctx->f32;
params[SI_PARAM_PRIM_MASK] = ctx->i32;
last_sgpr = SI_PARAM_PRIM_MASK;
- params[SI_PARAM_PERSP_SAMPLE] = v2i32;
- params[SI_PARAM_PERSP_CENTER] = v2i32;
- params[SI_PARAM_PERSP_CENTROID] = v2i32;
+ params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32;
+ params[SI_PARAM_PERSP_CENTER] = ctx->v2i32;
+ params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32;
params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
- params[SI_PARAM_LINEAR_SAMPLE] = v2i32;
- params[SI_PARAM_LINEAR_CENTER] = v2i32;
- params[SI_PARAM_LINEAR_CENTROID] = v2i32;
+ params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
+ params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
+ params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32;
params[SI_PARAM_POS_X_FLOAT] = ctx->f32;
params[SI_PARAM_POS_Y_FLOAT] = ctx->f32;
@@ -3701,8 +4001,39 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_FRONT_FACE] = ctx->i32;
params[SI_PARAM_ANCILLARY] = ctx->i32;
params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
- params[SI_PARAM_POS_FIXED_PT] = ctx->f32;
+ params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
num_params = SI_PARAM_POS_FIXED_PT+1;
+
+ if (!ctx->is_monolithic) {
+ /* Color inputs from the prolog. */
+ if (shader->selector->info.colors_read) {
+ unsigned num_color_elements =
+ util_bitcount(shader->selector->info.colors_read);
+
+ assert(num_params + num_color_elements <= ARRAY_SIZE(params));
+ for (i = 0; i < num_color_elements; i++)
+ params[num_params++] = ctx->f32;
+ }
+
+ /* Outputs for the epilog. */
+ num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
+ num_returns =
+ num_return_sgprs +
+ util_bitcount(shader->selector->info.colors_written) * 4 +
+ shader->selector->info.writes_z +
+ shader->selector->info.writes_stencil +
+ shader->selector->info.writes_samplemask +
+ 1 /* SampleMaskIn */;
+
+ num_returns = MAX2(num_returns,
+ num_return_sgprs +
+ PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
+
+ for (i = 0; i < num_return_sgprs; i++)
+ returns[i] = ctx->i32;
+ for (; i < num_returns; i++)
+ returns[i] = ctx->f32;
+ }
break;
default:
@@ -3711,20 +4042,38 @@ static void create_function(struct si_shader_context *ctx)
}
assert(num_params <= Elements(params));
- radeon_llvm_create_func(&ctx->radeon_bld, params, num_params);
- radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type);
-
- for (i = 0; i <= last_sgpr; ++i) {
- LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
- /* We tell llvm that array inputs are passed by value to allow Sinking pass
- * to move load. Inputs are constant so this is fine. */
- if (i <= last_array_pointer)
- LLVMAddAttribute(P, LLVMByValAttribute);
- else
- LLVMAddAttribute(P, LLVMInRegAttribute);
+ si_create_function(ctx, returns, num_returns, params,
+ num_params, last_array_pointer, last_sgpr);
+
+ /* Reserve register locations for VGPR inputs the PS prolog may need. */
+ if (ctx->type == TGSI_PROCESSOR_FRAGMENT &&
+ !ctx->is_monolithic) {
+ radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
+ "InitialPSInputAddr",
+ S_0286D0_PERSP_SAMPLE_ENA(1) |
+ S_0286D0_PERSP_CENTER_ENA(1) |
+ S_0286D0_PERSP_CENTROID_ENA(1) |
+ S_0286D0_LINEAR_SAMPLE_ENA(1) |
+ S_0286D0_LINEAR_CENTER_ENA(1) |
+ S_0286D0_LINEAR_CENTROID_ENA(1) |
+ S_0286D0_FRONT_FACE_ENA(1) |
+ S_0286D0_POS_FIXED_PT_ENA(1));
}
+ shader->info.num_input_sgprs = 0;
+ shader->info.num_input_vgprs = 0;
+
+ for (i = 0; i <= last_sgpr; ++i)
+ shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
+
+ /* Unused fragment shader inputs are eliminated by the compiler,
+ * so we don't know yet how many there will be.
+ */
+ if (ctx->type != TGSI_PROCESSOR_FRAGMENT)
+ for (; i < num_params; ++i)
+ shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+
if (bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
@@ -3740,22 +4089,8 @@ static void create_function(struct si_shader_context *ctx)
if ((ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
- ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
- /* This is the upper bound, maximum is 32 inputs times 32 vertices */
- unsigned vertex_data_dw_size = 32*32*4;
- unsigned patch_data_dw_size = 32*4;
- /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
- unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
- unsigned lds_dwords = patch_dw_size;
-
- /* The actual size is computed outside of the shader to reduce
- * the number of shader variants. */
- ctx->lds =
- LLVMAddGlobalInAddressSpace(gallivm->module,
- LLVMArrayType(ctx->i32, lds_dwords),
- "tess_lds",
- LOCAL_ADDR_SPACE);
- }
+ ctx->type == TGSI_PROCESSOR_TESS_EVAL)
+ declare_tess_lds(ctx);
}
static void preload_constants(struct si_shader_context *ctx)
@@ -3887,6 +4222,49 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
}
}
+static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
+ LLVMValueRef param_sampler_views,
+ unsigned param_pos_fixed_pt)
+{
+ struct lp_build_tgsi_context *bld_base =
+ &ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_emit_data result = {};
+ struct tgsi_full_instruction inst = {};
+ LLVMValueRef desc, sampler_index, address[2], pix;
+
+ /* Use the fixed-point gl_FragCoord input.
+ * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
+ * per coordinate to get the repeating effect.
+ */
+ address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5);
+ address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5);
+
+ /* Load the sampler view descriptor. */
+ sampler_index = lp_build_const_int32(gallivm, SI_POLY_STIPPLE_SAMPLER);
+ desc = get_sampler_desc_custom(ctx, param_sampler_views,
+ sampler_index, DESC_IMAGE);
+
+ /* Load the texel. */
+ inst.Instruction.Opcode = TGSI_OPCODE_TXF;
+ inst.Texture.Texture = TGSI_TEXTURE_2D_MSAA; /* = use load, not load_mip */
+ result.inst = &inst;
+ set_tex_fetch_args(ctx, &result, TGSI_OPCODE_TXF,
+ inst.Texture.Texture,
+ desc, NULL, address, ARRAY_SIZE(address), 0xf);
+ build_tex_intrinsic(&tex_action, bld_base, &result);
+
+ /* Kill the thread accordingly. */
+ pix = LLVMBuildExtractElement(gallivm->builder, result.output[0],
+ lp_build_const_int32(gallivm, 3), "");
+ pix = bitcast(bld_base, TGSI_TYPE_FLOAT, pix);
+ pix = LLVMBuildFNeg(gallivm->builder, pix, "");
+
+ lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+ LLVMVoidTypeInContext(gallivm->context),
+ &pix, 1, 0);
+}
+
void si_shader_binary_read_config(struct radeon_shader_binary *binary,
struct si_shader_config *conf,
unsigned symbol_offset)
@@ -3972,41 +4350,70 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
}
}
+static unsigned si_get_shader_binary_size(struct si_shader *shader)
+{
+ unsigned size = shader->binary.code_size;
+
+ if (shader->prolog)
+ size += shader->prolog->binary.code_size;
+ if (shader->epilog)
+ size += shader->epilog->binary.code_size;
+ return size;
+}
+
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
{
- const struct radeon_shader_binary *binary = &shader->binary;
- unsigned code_size = binary->code_size + binary->rodata_size;
+ const struct radeon_shader_binary *prolog =
+ shader->prolog ? &shader->prolog->binary : NULL;
+ const struct radeon_shader_binary *epilog =
+ shader->epilog ? &shader->epilog->binary : NULL;
+ const struct radeon_shader_binary *mainb = &shader->binary;
+ unsigned bo_size = si_get_shader_binary_size(shader) +
+ (!epilog ? mainb->rodata_size : 0);
unsigned char *ptr;
+ assert(!prolog || !prolog->rodata_size);
+ assert((!prolog && !epilog) || !mainb->rodata_size);
+ assert(!epilog || !epilog->rodata_size);
+
r600_resource_reference(&shader->bo, NULL);
shader->bo = si_resource_create_custom(&sscreen->b.b,
PIPE_USAGE_IMMUTABLE,
- code_size);
+ bo_size);
if (!shader->bo)
return -ENOMEM;
+ /* Upload. */
ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE);
- util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
- if (binary->rodata_size > 0) {
- ptr += binary->code_size;
- util_memcpy_cpu_to_le32(ptr, binary->rodata,
- binary->rodata_size);
+
+ if (prolog) {
+ util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
+ ptr += prolog->code_size;
}
+ util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
+ ptr += mainb->code_size;
+
+ if (epilog)
+ util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size);
+ else if (mainb->rodata_size > 0)
+ util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size);
+
sscreen->b.ws->buffer_unmap(shader->bo->buf);
return 0;
}
static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary,
- struct pipe_debug_callback *debug)
+ struct pipe_debug_callback *debug,
+ const char *name)
{
char *line, *p;
unsigned i, count;
if (binary->disasm_string) {
- fprintf(stderr, "\nShader Disassembly:\n\n");
- fprintf(stderr, "%s\n", binary->disasm_string);
+ fprintf(stderr, "Shader %s disassembly:\n", name);
+ fprintf(stderr, "%s", binary->disasm_string);
if (debug && debug->debug_message) {
/* Very long debug messages are cut off, so send the
@@ -4036,7 +4443,7 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary
"Shader Disassembly End");
}
} else {
- fprintf(stderr, "SI CODE:\n");
+ fprintf(stderr, "Shader %s binary:\n", name);
for (i = 0; i < binary->code_size; i += 4) {
fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i,
binary->code[i + 3], binary->code[i + 2],
@@ -4115,16 +4522,60 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
max_simd_waves);
}
+static const char *si_get_shader_name(struct si_shader *shader,
+ unsigned processor)
+{
+ switch (processor) {
+ case TGSI_PROCESSOR_VERTEX:
+ if (shader->key.vs.as_es)
+ return "Vertex Shader as ES";
+ else if (shader->key.vs.as_ls)
+ return "Vertex Shader as LS";
+ else
+ return "Vertex Shader as VS";
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return "Tessellation Control Shader";
+ case TGSI_PROCESSOR_TESS_EVAL:
+ if (shader->key.tes.as_es)
+ return "Tessellation Evaluation Shader as ES";
+ else
+ return "Tessellation Evaluation Shader as VS";
+ case TGSI_PROCESSOR_GEOMETRY:
+ if (shader->gs_copy_shader == NULL)
+ return "GS Copy Shader as VS";
+ else
+ return "Geometry Shader";
+ case TGSI_PROCESSOR_FRAGMENT:
+ return "Pixel Shader";
+ case TGSI_PROCESSOR_COMPUTE:
+ return "Compute Shader";
+ default:
+ return "Unknown Shader";
+ }
+}
+
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor)
{
- if (r600_can_dump_shader(&sscreen->b, processor))
- if (!(sscreen->b.debug_flags & DBG_NO_ASM))
- si_shader_dump_disassembly(&shader->binary, debug);
+ if (r600_can_dump_shader(&sscreen->b, processor) &&
+ !(sscreen->b.debug_flags & DBG_NO_ASM)) {
+ fprintf(stderr, "\n%s:\n", si_get_shader_name(shader, processor));
+
+ if (shader->prolog)
+ si_shader_dump_disassembly(&shader->prolog->binary,
+ debug, "prolog");
+
+ si_shader_dump_disassembly(&shader->binary, debug, "main");
+
+ if (shader->epilog)
+ si_shader_dump_disassembly(&shader->epilog->binary,
+ debug, "epilog");
+ fprintf(stderr, "\n");
+ }
si_shader_dump_stats(sscreen, &shader->config,
shader->selector ? shader->selector->info.num_inputs : 0,
- shader->binary.code_size, debug, processor);
+ si_get_shader_binary_size(shader), debug, processor);
}
int si_compile_llvm(struct si_screen *sscreen,
@@ -4177,6 +4628,19 @@ int si_compile_llvm(struct si_screen *sscreen,
FREE(binary->global_symbol_offsets);
binary->config = NULL;
binary->global_symbol_offsets = NULL;
+
+ /* Some shaders can't have rodata because their binaries can be
+ * concatenated.
+ */
+ if (binary->rodata_size &&
+ (processor == TGSI_PROCESSOR_VERTEX ||
+ processor == TGSI_PROCESSOR_TESS_CTRL ||
+ processor == TGSI_PROCESSOR_TESS_EVAL ||
+ processor == TGSI_PROCESSOR_FRAGMENT)) {
+ fprintf(stderr, "radeonsi: The shader can't have rodata.");
+ return -EINVAL;
+ }
+
return r;
}
@@ -4196,7 +4660,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
- si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm, gsinfo);
+ si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm);
ctx->type = TGSI_PROCESSOR_VERTEX;
ctx->is_gs_copy_shader = true;
@@ -4241,7 +4705,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
- LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+ LLVMBuildRet(gallivm->builder, ctx->return_value);
/* Dump LLVM IR before any optimization passes */
if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
@@ -4278,35 +4742,38 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
switch (shader) {
case PIPE_SHADER_VERTEX:
fprintf(f, " instance_divisors = {");
- for (i = 0; i < Elements(key->vs.instance_divisors); i++)
+ for (i = 0; i < Elements(key->vs.prolog.instance_divisors); i++)
fprintf(f, !i ? "%u" : ", %u",
- key->vs.instance_divisors[i]);
+ key->vs.prolog.instance_divisors[i]);
fprintf(f, "}\n");
fprintf(f, " as_es = %u\n", key->vs.as_es);
fprintf(f, " as_ls = %u\n", key->vs.as_ls);
- fprintf(f, " export_prim_id = %u\n", key->vs.export_prim_id);
+ fprintf(f, " export_prim_id = %u\n", key->vs.epilog.export_prim_id);
break;
case PIPE_SHADER_TESS_CTRL:
- fprintf(f, " prim_mode = %u\n", key->tcs.prim_mode);
+ fprintf(f, " prim_mode = %u\n", key->tcs.epilog.prim_mode);
break;
case PIPE_SHADER_TESS_EVAL:
fprintf(f, " as_es = %u\n", key->tes.as_es);
- fprintf(f, " export_prim_id = %u\n", key->tes.export_prim_id);
+ fprintf(f, " export_prim_id = %u\n", key->tes.epilog.export_prim_id);
break;
case PIPE_SHADER_GEOMETRY:
break;
case PIPE_SHADER_FRAGMENT:
- fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format);
- fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf);
- fprintf(f, " color_two_side = %u\n", key->ps.color_two_side);
- fprintf(f, " alpha_func = %u\n", key->ps.alpha_func);
- fprintf(f, " alpha_to_one = %u\n", key->ps.alpha_to_one);
- fprintf(f, " poly_stipple = %u\n", key->ps.poly_stipple);
- fprintf(f, " clamp_color = %u\n", key->ps.clamp_color);
+ fprintf(f, " prolog.color_two_side = %u\n", key->ps.prolog.color_two_side);
+ fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple);
+ fprintf(f, " prolog.force_persample_interp = %u\n", key->ps.prolog.force_persample_interp);
+ fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format);
+ fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8);
+ fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf);
+ fprintf(f, " epilog.alpha_func = %u\n", key->ps.epilog.alpha_func);
+ fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.epilog.alpha_to_one);
+ fprintf(f, " epilog.poly_line_smoothing = %u\n", key->ps.epilog.poly_line_smoothing);
+ fprintf(f, " epilog.clamp_color = %u\n", key->ps.epilog.clamp_color);
break;
default:
@@ -4317,13 +4784,12 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
struct si_shader *shader,
- LLVMTargetMachineRef tm,
- struct tgsi_shader_info *info)
+ LLVMTargetMachineRef tm)
{
struct lp_build_tgsi_context *bld_base;
memset(ctx, 0, sizeof(*ctx));
- radeon_llvm_context_init(&ctx->radeon_bld);
+ radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--");
ctx->tm = tm;
ctx->screen = sscreen;
if (shader && shader->selector)
@@ -4336,15 +4802,18 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i8 = LLVMInt8TypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i32 = LLVMInt32TypeInContext(ctx->radeon_bld.gallivm.context);
- ctx->i128 = LLVMInt128TypeInContext(ctx->radeon_bld.gallivm.context);
+ ctx->i64 = LLVMInt64TypeInContext(ctx->radeon_bld.gallivm.context);
+ ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128);
ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context);
ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
+ ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
bld_base = &ctx->radeon_bld.soa.bld_base;
- bld_base->info = info;
+ if (shader && shader->selector)
+ bld_base->info = &shader->selector->info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
@@ -4380,40 +4849,31 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
}
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
- struct si_shader *shader,
- struct pipe_debug_callback *debug)
+int si_compile_tgsi_shader(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ bool is_monolithic,
+ struct pipe_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
- struct tgsi_token *tokens = sel->tokens;
struct si_shader_context ctx;
struct lp_build_tgsi_context *bld_base;
- struct tgsi_shader_info stipple_shader_info;
LLVMModuleRef mod;
int r = 0;
- bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
- shader->key.ps.poly_stipple;
-
- if (poly_stipple) {
- tokens = util_pstipple_create_fragment_shader(tokens, NULL,
- SI_POLY_STIPPLE_SAMPLER,
- TGSI_FILE_SYSTEM_VALUE);
- tgsi_scan_shader(tokens, &stipple_shader_info);
- }
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
!(sscreen->b.debug_flags & DBG_NO_TGSI)) {
si_dump_shader_key(sel->type, &shader->key, stderr);
- tgsi_dump(tokens, 0);
+ tgsi_dump(sel->tokens, 0);
si_dump_streamout(&sel->so);
}
- si_init_shader_ctx(&ctx, sscreen, shader, tm,
- poly_stipple ? &stipple_shader_info : &sel->info);
+ si_init_shader_ctx(&ctx, sscreen, shader, tm);
+ ctx.is_monolithic = is_monolithic;
- shader->uses_instanceid = sel->info.uses_instanceid;
+ shader->info.uses_instanceid = sel->info.uses_instanceid;
bld_base = &ctx.radeon_bld.soa.bld_base;
ctx.radeon_bld.load_system_value = declare_system_value;
@@ -4447,7 +4907,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
break;
case TGSI_PROCESSOR_FRAGMENT:
ctx.radeon_bld.load_input = declare_input_fs;
- bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
+ if (is_monolithic)
+ bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
+ else
+ bld_base->emit_epilogue = si_llvm_return_fs_outputs;
break;
default:
assert(!"Unsupported shader type");
@@ -4461,6 +4924,14 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
preload_streamout_buffers(&ctx);
preload_ring_buffers(&ctx);
+ if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT &&
+ shader->key.ps.prolog.poly_stipple) {
+ LLVMValueRef views = LLVMGetParam(ctx.radeon_bld.main_fn,
+ SI_PARAM_SAMPLERS);
+ si_llvm_emit_polygon_stipple(&ctx, views,
+ SI_PARAM_POS_FIXED_PT);
+ }
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
int i;
for (i = 0; i < 4; i++) {
@@ -4470,12 +4941,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
}
}
- if (!lp_build_tgsi_llvm(bld_base, tokens)) {
+ if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
goto out;
}
- LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+ LLVMBuildRet(bld_base->base.gallivm->builder, ctx.return_value);
mod = bld_base->base.gallivm->module;
/* Dump LLVM IR before any optimization passes */
@@ -4492,16 +4963,49 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
goto out;
}
- si_shader_dump(sscreen, shader, debug, ctx.type);
+ radeon_llvm_dispose(&ctx.radeon_bld);
- r = si_shader_binary_upload(sscreen, shader);
- if (r) {
- fprintf(stderr, "LLVM failed to upload shader\n");
- goto out;
+ /* Calculate the number of fragment input VGPRs. */
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
+ shader->info.num_input_vgprs = 0;
+ shader->info.face_vgpr_index = -1;
+
+ if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_PERSP_PULL_MODEL_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 3;
+ if (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_FRONT_FACE_ENA(shader->config.spi_ps_input_addr)) {
+ shader->info.face_vgpr_index = shader->info.num_input_vgprs;
+ shader->info.num_input_vgprs += 1;
+ }
+ if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
}
- radeon_llvm_dispose(&ctx.radeon_bld);
-
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
shader->gs_copy_shader->selector = shader->selector;
@@ -4517,11 +5021,968 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
out:
for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
FREE(ctx.constants[i]);
- if (poly_stipple)
- tgsi_free_tokens(tokens);
return r;
}
+/**
+ * Create, compile and return a shader part (prolog or epilog).
+ *
+ * \param sscreen screen
+ * \param list list of shader parts of the same category
+ * \param key shader part key
+ * \param tm LLVM target machine
+ * \param debug debug callback
+ * \param compile the callback responsible for compilation
+ * \return non-NULL on success
+ */
+static struct si_shader_part *
+si_get_shader_part(struct si_screen *sscreen,
+ struct si_shader_part **list,
+ union si_shader_part_key *key,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ bool (*compile)(struct si_screen *,
+ LLVMTargetMachineRef,
+ struct pipe_debug_callback *,
+ struct si_shader_part *))
+{
+ struct si_shader_part *result;
+
+ pipe_mutex_lock(sscreen->shader_parts_mutex);
+
+ /* Find existing. */
+ for (result = *list; result; result = result->next) {
+ if (memcmp(&result->key, key, sizeof(*key)) == 0) {
+ pipe_mutex_unlock(sscreen->shader_parts_mutex);
+ return result;
+ }
+ }
+
+ /* Compile a new one. */
+ result = CALLOC_STRUCT(si_shader_part);
+ result->key = *key;
+ if (!compile(sscreen, tm, debug, result)) {
+ FREE(result);
+ pipe_mutex_unlock(sscreen->shader_parts_mutex);
+ return NULL;
+ }
+
+ result->next = *list;
+ *list = result;
+ pipe_mutex_unlock(sscreen->shader_parts_mutex);
+ return result;
+}
+
+/**
+ * Create a vertex shader prolog.
+ *
+ * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
+ * All inputs are returned unmodified. The vertex load indices are
+ * stored after them, which will used by the API VS for fetching inputs.
+ *
+ * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are:
+ * input_v0,
+ * input_v1,
+ * input_v2,
+ * input_v3,
+ * (VertexID + BaseVertex),
+ * (InstanceID + StartInstance),
+ * (InstanceID / 2 + StartInstance)
+ */
+static bool si_compile_vs_prolog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ LLVMTypeRef *params, *returns;
+ LLVMValueRef ret, func;
+ int last_sgpr, num_params, num_returns, i;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ ctx.type = TGSI_PROCESSOR_VERTEX;
+ ctx.param_vertex_id = key->vs_prolog.num_input_sgprs;
+ ctx.param_instance_id = key->vs_prolog.num_input_sgprs + 3;
+
+ /* 4 preloaded VGPRs + vertex load indices as prolog outputs */
+ params = alloca((key->vs_prolog.num_input_sgprs + 4) *
+ sizeof(LLVMTypeRef));
+ returns = alloca((key->vs_prolog.num_input_sgprs + 4 +
+ key->vs_prolog.last_input + 1) *
+ sizeof(LLVMTypeRef));
+ num_params = 0;
+ num_returns = 0;
+
+ /* Declare input and output SGPRs. */
+ num_params = 0;
+ for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
+ params[num_params++] = ctx.i32;
+ returns[num_returns++] = ctx.i32;
+ }
+ last_sgpr = num_params - 1;
+
+ /* 4 preloaded VGPRs (outputs must be floats) */
+ for (i = 0; i < 4; i++) {
+ params[num_params++] = ctx.i32;
+ returns[num_returns++] = ctx.f32;
+ }
+
+ /* Vertex load indices. */
+ for (i = 0; i <= key->vs_prolog.last_input; i++)
+ returns[num_returns++] = ctx.f32;
+
+ /* Create the function. */
+ si_create_function(&ctx, returns, num_returns, params,
+ num_params, -1, last_sgpr);
+ func = ctx.radeon_bld.main_fn;
+
+ /* Copy inputs to outputs. This should be no-op, as the registers match,
+ * but it will prevent the compiler from overwriting them unintentionally.
+ */
+ ret = ctx.return_value;
+ for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
+ LLVMValueRef p = LLVMGetParam(func, i);
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ }
+ for (i = num_params - 4; i < num_params; i++) {
+ LLVMValueRef p = LLVMGetParam(func, i);
+ p = LLVMBuildBitCast(gallivm->builder, p, ctx.f32, "");
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ }
+
+ /* Compute vertex load indices from instance divisors. */
+ for (i = 0; i <= key->vs_prolog.last_input; i++) {
+ unsigned divisor = key->vs_prolog.states.instance_divisors[i];
+ LLVMValueRef index;
+
+ if (divisor) {
+ /* InstanceID / Divisor + StartInstance */
+ index = get_instance_index_for_fetch(&ctx.radeon_bld,
+ SI_SGPR_START_INSTANCE,
+ divisor);
+ } else {
+ /* VertexID + BaseVertex */
+ index = LLVMBuildAdd(gallivm->builder,
+ LLVMGetParam(func, ctx.param_vertex_id),
+ LLVMGetParam(func, SI_SGPR_BASE_VERTEX), "");
+ }
+
+ index = LLVMBuildBitCast(gallivm->builder, index, ctx.f32, "");
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
+ num_params++, "");
+ }
+
+ /* Compile. */
+ LLVMBuildRet(gallivm->builder, ret);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Vertex Shader Prolog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Compile the vertex shader epilog. This is also used by the tessellation
+ * evaluation shader compiled as VS.
+ *
+ * The input is PrimitiveID.
+ *
+ * If PrimitiveID is required by the pixel shader, export it.
+ * Otherwise, do nothing.
+ */
+static bool si_compile_vs_epilog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+ LLVMTypeRef params[5];
+ int num_params, i;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, NULL, tm);
+ ctx.type = TGSI_PROCESSOR_VERTEX;
+
+ /* Declare input VGPRs. */
+ num_params = key->vs_epilog.states.export_prim_id ?
+ (VS_EPILOG_PRIMID_LOC + 1) : 0;
+ assert(num_params <= ARRAY_SIZE(params));
+
+ for (i = 0; i < num_params; i++)
+ params[i] = ctx.f32;
+
+ /* Create the function. */
+ si_create_function(&ctx, NULL, 0, params, num_params,
+ -1, -1);
+
+ /* Emit exports. */
+ if (key->vs_epilog.states.export_prim_id) {
+ struct lp_build_context *base = &bld_base->base;
+ struct lp_build_context *uint = &bld_base->uint_bld;
+ LLVMValueRef args[9];
+
+ args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
+ args[1] = uint->zero; /* whether the EXEC mask is valid */
+ args[2] = uint->zero; /* DONE bit */
+ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_PARAM +
+ key->vs_epilog.prim_id_param_offset);
+ args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+ args[5] = LLVMGetParam(ctx.radeon_bld.main_fn,
+ VS_EPILOG_PRIMID_LOC); /* X */
+ args[6] = uint->undef; /* Y */
+ args[7] = uint->undef; /* Z */
+ args[8] = uint->undef; /* W */
+
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9, 0);
+ }
+
+ /* Compile. */
+ LLVMBuildRet(gallivm->builder, ctx.return_value);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Vertex Shader Epilog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Create & compile a vertex shader epilog. This a helper used by VS and TES.
+ */
+static bool si_get_vs_epilog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug,
+ struct si_vs_epilog_bits *states)
+{
+ union si_shader_part_key epilog_key;
+
+ memset(&epilog_key, 0, sizeof(epilog_key));
+ epilog_key.vs_epilog.states = *states;
+
+ /* Set up the PrimitiveID output. */
+ if (shader->key.vs.epilog.export_prim_id) {
+ unsigned index = shader->selector->info.num_outputs;
+ unsigned offset = shader->info.nr_param_exports++;
+
+ epilog_key.vs_epilog.prim_id_param_offset = offset;
+ assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[index] = offset;
+ }
+
+ shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs,
+ &epilog_key, tm, debug,
+ si_compile_vs_epilog);
+ return shader->epilog != NULL;
+}
+
+/**
+ * Select and compile (or reuse) vertex shader parts (prolog & epilog).
+ */
+static bool si_shader_select_vs_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+ union si_shader_part_key prolog_key;
+ unsigned i;
+
+ /* Get the prolog. */
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.vs_prolog.states = shader->key.vs.prolog;
+ prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ prolog_key.vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
+
+ /* The prolog is a no-op if there are no inputs. */
+ if (info->num_inputs) {
+ shader->prolog =
+ si_get_shader_part(sscreen, &sscreen->vs_prologs,
+ &prolog_key, tm, debug,
+ si_compile_vs_prolog);
+ if (!shader->prolog)
+ return false;
+ }
+
+ /* Get the epilog. */
+ if (!shader->key.vs.as_es && !shader->key.vs.as_ls &&
+ !si_get_vs_epilog(sscreen, tm, shader, debug,
+ &shader->key.vs.epilog))
+ return false;
+
+ /* Set the instanceID flag. */
+ for (i = 0; i < info->num_inputs; i++)
+ if (prolog_key.vs_prolog.states.instance_divisors[i])
+ shader->info.uses_instanceid = true;
+
+ return true;
+}
+
+/**
+ * Select and compile (or reuse) TES parts (epilog).
+ */
+static bool si_shader_select_tes_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ if (shader->key.tes.as_es)
+ return true;
+
+ /* TES compiled as VS. */
+ return si_get_vs_epilog(sscreen, tm, shader, debug,
+ &shader->key.tes.epilog);
+}
+
+/**
+ * Compile the TCS epilog. This writes tesselation factors to memory based on
+ * the output primitive type of the tesselator (determined by TES).
+ */
+static bool si_compile_tcs_epilog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+ LLVMTypeRef params[16];
+ LLVMValueRef func;
+ int last_array_pointer, last_sgpr, num_params;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ ctx.type = TGSI_PROCESSOR_TESS_CTRL;
+ shader.key.tcs.epilog = key->tcs_epilog.states;
+
+ /* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */
+ params[SI_PARAM_RW_BUFFERS] = const_array(ctx.v16i8, SI_NUM_RW_BUFFERS);
+ last_array_pointer = SI_PARAM_RW_BUFFERS;
+ params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
+ params[SI_PARAM_SAMPLERS] = ctx.i64;
+ params[SI_PARAM_UNUSED] = ctx.i64;
+ params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
+ params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
+ params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
+ params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx.i32;
+ last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
+ num_params = last_sgpr + 1;
+
+ params[num_params++] = ctx.i32; /* patch index within the wave (REL_PATCH_ID) */
+ params[num_params++] = ctx.i32; /* invocation ID within the patch */
+ params[num_params++] = ctx.i32; /* LDS offset where tess factors should be loaded from */
+
+ /* Create the function. */
+ si_create_function(&ctx, NULL, 0, params, num_params,
+ last_array_pointer, last_sgpr);
+ declare_tess_lds(&ctx);
+ func = ctx.radeon_bld.main_fn;
+
+ si_write_tess_factors(bld_base,
+ LLVMGetParam(func, last_sgpr + 1),
+ LLVMGetParam(func, last_sgpr + 2),
+ LLVMGetParam(func, last_sgpr + 3));
+
+ /* Compile. */
+ LLVMBuildRet(gallivm->builder, ctx.return_value);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Tessellation Control Shader Epilog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Select and compile (or reuse) TCS parts (epilog).
+ */
+static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ union si_shader_part_key epilog_key;
+
+ /* Get the epilog. */
+ memset(&epilog_key, 0, sizeof(epilog_key));
+ epilog_key.tcs_epilog.states = shader->key.tcs.epilog;
+
+ shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs,
+ &epilog_key, tm, debug,
+ si_compile_tcs_epilog);
+ return shader->epilog != NULL;
+}
+
+/**
+ * Compile the pixel shader prolog. This handles:
+ * - two-side color selection and interpolation
+ * - overriding interpolation parameters for the API PS
+ * - polygon stippling
+ *
+ * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
+ * overriden by other states. (e.g. per-sample interpolation)
+ * Interpolated colors are stored after the preloaded VGPRs.
+ */
+static bool si_compile_ps_prolog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ LLVMTypeRef *params;
+ LLVMValueRef ret, func;
+ int last_sgpr, num_params, num_returns, i, num_color_channels;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ ctx.type = TGSI_PROCESSOR_FRAGMENT;
+ shader.key.ps.prolog = key->ps_prolog.states;
+
+ /* Number of inputs + 8 color elements. */
+ params = alloca((key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.num_input_vgprs + 8) *
+ sizeof(LLVMTypeRef));
+
+ /* Declare inputs. */
+ num_params = 0;
+ for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
+ params[num_params++] = ctx.i32;
+ last_sgpr = num_params - 1;
+
+ for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
+ params[num_params++] = ctx.f32;
+
+ /* Declare outputs (same as inputs + add colors if needed) */
+ num_returns = num_params;
+ num_color_channels = util_bitcount(key->ps_prolog.colors_read);
+ for (i = 0; i < num_color_channels; i++)
+ params[num_returns++] = ctx.f32;
+
+ /* Create the function. */
+ si_create_function(&ctx, params, num_returns, params,
+ num_params, -1, last_sgpr);
+ func = ctx.radeon_bld.main_fn;
+
+ /* Copy inputs to outputs. This should be no-op, as the registers match,
+ * but it will prevent the compiler from overwriting them unintentionally.
+ */
+ ret = ctx.return_value;
+ for (i = 0; i < num_params; i++) {
+ LLVMValueRef p = LLVMGetParam(func, i);
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ }
+
+ /* Polygon stippling. */
+ if (key->ps_prolog.states.poly_stipple) {
+ /* POS_FIXED_PT is always last. */
+ unsigned pos = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.num_input_vgprs - 1;
+ LLVMValueRef ptr[2], views;
+
+ /* Get the pointer to sampler views. */
+ ptr[0] = LLVMGetParam(func, SI_SGPR_SAMPLERS);
+ ptr[1] = LLVMGetParam(func, SI_SGPR_SAMPLERS+1);
+ views = lp_build_gather_values(gallivm, ptr, 2);
+ views = LLVMBuildBitCast(gallivm->builder, views, ctx.i64, "");
+ views = LLVMBuildIntToPtr(gallivm->builder, views,
+ const_array(ctx.v8i32, SI_NUM_SAMPLERS), "");
+
+ si_llvm_emit_polygon_stipple(&ctx, views, pos);
+ }
+
+ /* Interpolate colors. */
+ for (i = 0; i < 2; i++) {
+ unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
+ unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.face_vgpr_index;
+ LLVMValueRef interp[2], color[4];
+ LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
+
+ if (!writemask)
+ continue;
+
+ /* If the interpolation qualifier is not CONSTANT (-1). */
+ if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
+ unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.color_interp_vgpr_index[i];
+
+ interp[0] = LLVMGetParam(func, interp_vgpr);
+ interp[1] = LLVMGetParam(func, interp_vgpr + 1);
+ interp_ij = lp_build_gather_values(gallivm, interp, 2);
+ interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
+ ctx.v2i32, "");
+ }
+
+ /* Use the absolute location of the input. */
+ prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
+
+ if (key->ps_prolog.states.color_two_side) {
+ face = LLVMGetParam(func, face_vgpr);
+ face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
+ }
+
+ interp_fs_input(&ctx,
+ key->ps_prolog.color_attr_index[i],
+ TGSI_SEMANTIC_COLOR, i,
+ key->ps_prolog.num_interp_inputs,
+ key->ps_prolog.colors_read, interp_ij,
+ prim_mask, face, color);
+
+ while (writemask) {
+ unsigned chan = u_bit_scan(&writemask);
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
+ num_params++, "");
+ }
+ }
+
+ /* Force per-sample interpolation. */
+ if (key->ps_prolog.states.force_persample_interp) {
+ unsigned i, base = key->ps_prolog.num_input_sgprs;
+ LLVMValueRef persp_sample[2], linear_sample[2];
+
+ /* Read PERSP_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ persp_sample[i] = LLVMGetParam(func, base + i);
+ /* Overwrite PERSP_CENTER. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_sample[i], base + 2 + i, "");
+ /* Overwrite PERSP_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_sample[i], base + 4 + i, "");
+ /* Read LINEAR_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ linear_sample[i] = LLVMGetParam(func, base + 6 + i);
+ /* Overwrite LINEAR_CENTER. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_sample[i], base + 8 + i, "");
+ /* Overwrite LINEAR_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_sample[i], base + 10 + i, "");
+ }
+
+ /* Compile. */
+ LLVMBuildRet(gallivm->builder, ret);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Fragment Shader Prolog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Compile the pixel shader epilog. This handles everything that must be
+ * emulated for pixel shader exports. (alpha-test, format conversions, etc)
+ */
+static bool si_compile_ps_epilog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+ LLVMTypeRef params[16+8*4+3];
+ LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+ int last_array_pointer, last_sgpr, num_params, i;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ ctx.type = TGSI_PROCESSOR_FRAGMENT;
+ shader.key.ps.epilog = key->ps_epilog.states;
+
+ /* Declare input SGPRs. */
+ params[SI_PARAM_RW_BUFFERS] = ctx.i64;
+ params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
+ params[SI_PARAM_SAMPLERS] = ctx.i64;
+ params[SI_PARAM_UNUSED] = ctx.i64;
+ params[SI_PARAM_ALPHA_REF] = ctx.f32;
+ last_array_pointer = -1;
+ last_sgpr = SI_PARAM_ALPHA_REF;
+
+ /* Declare input VGPRs. */
+ num_params = (last_sgpr + 1) +
+ util_bitcount(key->ps_epilog.colors_written) * 4 +
+ key->ps_epilog.writes_z +
+ key->ps_epilog.writes_stencil +
+ key->ps_epilog.writes_samplemask;
+
+ num_params = MAX2(num_params,
+ last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
+
+ assert(num_params <= ARRAY_SIZE(params));
+
+ for (i = last_sgpr + 1; i < num_params; i++)
+ params[i] = ctx.f32;
+
+ /* Create the function. */
+ si_create_function(&ctx, NULL, 0, params, num_params,
+ last_array_pointer, last_sgpr);
+ /* Disable elimination of unused inputs. */
+ radeon_llvm_add_attribute(ctx.radeon_bld.main_fn,
+ "InitialPSInputAddr", 0xffffff);
+
+ /* Process colors. */
+ unsigned vgpr = last_sgpr + 1;
+ unsigned colors_written = key->ps_epilog.colors_written;
+ int last_color_export = -1;
+
+ /* Find the last color export. */
+ if (!key->ps_epilog.writes_z &&
+ !key->ps_epilog.writes_stencil &&
+ !key->ps_epilog.writes_samplemask) {
+ unsigned spi_format = key->ps_epilog.states.spi_shader_col_format;
+
+ /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+ if (colors_written == 0x1 && key->ps_epilog.states.last_cbuf > 0) {
+ /* Just set this if any of the colorbuffers are enabled. */
+ if (spi_format &
+ ((1llu << (4 * (key->ps_epilog.states.last_cbuf + 1))) - 1))
+ last_color_export = 0;
+ } else {
+ for (i = 0; i < 8; i++)
+ if (colors_written & (1 << i) &&
+ (spi_format >> (i * 4)) & 0xf)
+ last_color_export = i;
+ }
+ }
+
+ while (colors_written) {
+ LLVMValueRef color[4];
+ int mrt = u_bit_scan(&colors_written);
+
+ for (i = 0; i < 4; i++)
+ color[i] = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+
+ si_export_mrt_color(bld_base, color, mrt,
+ num_params - 1,
+ mrt == last_color_export);
+ }
+
+ /* Process depth, stencil, samplemask. */
+ if (key->ps_epilog.writes_z)
+ depth = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+ if (key->ps_epilog.writes_stencil)
+ stencil = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+ if (key->ps_epilog.writes_samplemask)
+ samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+
+ if (depth || stencil || samplemask)
+ si_export_mrt_z(bld_base, depth, stencil, samplemask);
+ else if (last_color_export == -1)
+ si_export_null(bld_base);
+
+ /* Compile. */
+ LLVMBuildRetVoid(gallivm->builder);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Fragment Shader Epilog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Select and compile (or reuse) pixel shader parts (prolog & epilog).
+ */
+static bool si_shader_select_ps_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+ union si_shader_part_key prolog_key;
+ union si_shader_part_key epilog_key;
+ unsigned i;
+
+ /* Get the prolog. */
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.ps_prolog.states = shader->key.ps.prolog;
+ prolog_key.ps_prolog.colors_read = info->colors_read;
+ prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
+
+ if (info->colors_read) {
+ unsigned *color = shader->selector->color_attr_index;
+
+ if (shader->key.ps.prolog.color_two_side) {
+ /* BCOLORs are stored after the last input. */
+ prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
+ prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
+ shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ unsigned location = info->input_interpolate_loc[color[i]];
+
+ if (!(info->colors_read & (0xf << i*4)))
+ continue;
+
+ prolog_key.ps_prolog.color_attr_index[i] = color[i];
+
+ /* Force per-sample interpolation for the colors here. */
+ if (shader->key.ps.prolog.force_persample_interp)
+ location = TGSI_INTERPOLATE_LOC_SAMPLE;
+
+ switch (info->input_interpolate[color[i]]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ case TGSI_INTERPOLATE_COLOR:
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
+ }
+
+ /* The prolog is a no-op if these aren't set. */
+ if (prolog_key.ps_prolog.colors_read ||
+ prolog_key.ps_prolog.states.force_persample_interp ||
+ prolog_key.ps_prolog.states.poly_stipple) {
+ shader->prolog =
+ si_get_shader_part(sscreen, &sscreen->ps_prologs,
+ &prolog_key, tm, debug,
+ si_compile_ps_prolog);
+ if (!shader->prolog)
+ return false;
+ }
+
+ /* Get the epilog. */
+ memset(&epilog_key, 0, sizeof(epilog_key));
+ epilog_key.ps_epilog.colors_written = info->colors_written;
+ epilog_key.ps_epilog.writes_z = info->writes_z;
+ epilog_key.ps_epilog.writes_stencil = info->writes_stencil;
+ epilog_key.ps_epilog.writes_samplemask = info->writes_samplemask;
+ epilog_key.ps_epilog.states = shader->key.ps.epilog;
+
+ shader->epilog =
+ si_get_shader_part(sscreen, &sscreen->ps_epilogs,
+ &epilog_key, tm, debug,
+ si_compile_ps_epilog);
+ if (!shader->epilog)
+ return false;
+
+ /* Enable POS_FIXED_PT if polygon stippling is enabled. */
+ if (shader->key.ps.prolog.poly_stipple) {
+ shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
+ assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
+ }
+
+ /* Set up the enable bits for per-sample shading if needed. */
+ if (shader->key.ps.prolog.force_persample_interp) {
+ if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
+ }
+ if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
+ }
+ }
+
+ /* POW_W_FLOAT requires that one of the perspective weights is enabled. */
+ if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
+ !(shader->config.spi_ps_input_ena & 0xf)) {
+ shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
+ assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr));
+ }
+
+ /* At least one pair of interpolation weights must be enabled. */
+ if (!(shader->config.spi_ps_input_ena & 0x7f)) {
+ shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
+ assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
+ }
+
+ /* The sample mask input is always enabled, because the API shader always
+ * passes it through to the epilog. Disable it here if it's unused.
+ */
+ if (!shader->key.ps.epilog.poly_line_smoothing &&
+ !shader->selector->info.reads_samplemask)
+ shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
+
+ return true;
+}
+
+int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ struct si_shader *mainp = shader->selector->main_shader_part;
+ int r;
+
+ /* LS and ES are always compiled on demand. */
+ if (!mainp ||
+ (shader->selector->type == PIPE_SHADER_VERTEX &&
+ (shader->key.vs.as_es || shader->key.vs.as_ls)) ||
+ (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
+ shader->key.tes.as_es)) {
+ /* Monolithic shader (compiled as a whole, has many variants,
+ * may take a long time to compile).
+ */
+ r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
+ if (r)
+ return r;
+ } else {
+ /* The shader consists of 2-3 parts:
+ *
+ * - the middle part is the user shader, it has 1 variant only
+ * and it was compiled during the creation of the shader
+ * selector
+ * - the prolog part is inserted at the beginning
+ * - the epilog part is inserted at the end
+ *
+ * The prolog and epilog have many (but simple) variants.
+ */
+
+ /* Copy the compiled TGSI shader data over. */
+ shader->is_binary_shared = true;
+ shader->binary = mainp->binary;
+ shader->config = mainp->config;
+ shader->info.num_input_sgprs = mainp->info.num_input_sgprs;
+ shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
+ shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
+ memcpy(shader->info.vs_output_param_offset,
+ mainp->info.vs_output_param_offset,
+ sizeof(mainp->info.vs_output_param_offset));
+ shader->info.uses_instanceid = mainp->info.uses_instanceid;
+ shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
+ shader->info.nr_param_exports = mainp->info.nr_param_exports;
+
+ /* Select prologs and/or epilogs. */
+ switch (shader->selector->type) {
+ case PIPE_SHADER_VERTEX:
+ if (!si_shader_select_vs_parts(sscreen, tm, shader, debug))
+ return -1;
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ if (!si_shader_select_tcs_parts(sscreen, tm, shader, debug))
+ return -1;
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (!si_shader_select_tes_parts(sscreen, tm, shader, debug))
+ return -1;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ if (!si_shader_select_ps_parts(sscreen, tm, shader, debug))
+ return -1;
+
+ /* Make sure we have at least as many VGPRs as there
+ * are allocated inputs.
+ */
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->info.num_input_vgprs);
+ break;
+ }
+
+ /* Update SGPR and VGPR counts. */
+ if (shader->prolog) {
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+ shader->prolog->config.num_sgprs);
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->prolog->config.num_vgprs);
+ }
+ if (shader->epilog) {
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+ shader->epilog->config.num_sgprs);
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->epilog->config.num_vgprs);
+ }
+ }
+
+ si_shader_dump(sscreen, shader, debug, shader->selector->info.processor);
+
+ /* Upload. */
+ r = si_shader_binary_upload(sscreen, shader);
+ if (r) {
+ fprintf(stderr, "LLVM failed to upload shader\n");
+ return r;
+ }
+
+ return 0;
+}
+
void si_shader_destroy(struct si_shader *shader)
{
if (shader->gs_copy_shader) {
@@ -4534,5 +5995,6 @@ void si_shader_destroy(struct si_shader *shader)
r600_resource_reference(&shader->bo, NULL);
- radeon_shader_binary_clean(&shader->binary);
+ if (!shader->is_binary_shared)
+ radeon_shader_binary_clean(&shader->binary);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index dc75e0330e4..ff5c24d8918 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -75,6 +75,8 @@
struct radeon_shader_binary;
struct radeon_shader_reloc;
+#define SI_MAX_VS_OUTPUTS 40
+
#define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */
#define SI_SGPR_CONST_BUFFERS 2
#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */
@@ -169,7 +171,7 @@ struct radeon_shader_reloc;
#define SI_PARAM_SAMPLE_COVERAGE 20
#define SI_PARAM_POS_FIXED_PT 21
-#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
+#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
struct si_shader;
@@ -181,6 +183,11 @@ struct si_shader_selector {
struct si_shader *first_variant; /* immutable after the first variant */
struct si_shader *last_variant; /* mutable */
+ /* The compiled TGSI shader expecting a prolog and/or epilog (not
+ * uploaded to a buffer).
+ */
+ struct si_shader *main_shader_part;
+
struct tgsi_token *tokens;
struct pipe_stream_output_info so;
struct tgsi_shader_info info;
@@ -199,6 +206,7 @@ struct si_shader_selector {
unsigned max_gsvs_emit_size;
/* PS parameters. */
+ unsigned color_attr_index[2];
unsigned db_shader_control;
/* Set 0xf or 0x0 (4 bits) per each written output.
* ANDed with spi_shader_col_format.
@@ -221,37 +229,103 @@ struct si_shader_selector {
* With both: LS | HS | ES | GS | VS | PS
*/
+/* Common VS bits between the shader key and the prolog key. */
+struct si_vs_prolog_bits {
+ unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
+};
+
+/* Common VS bits between the shader key and the epilog key. */
+struct si_vs_epilog_bits {
+ unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
+ /* TODO:
+ * - skip clipdist, culldist (including clipvertex code) exports based
+ * on which clip_plane_enable bits are set
+ * - skip layer, viewport, clipdist, and culldist parameter exports
+ * if PS doesn't read them
+ */
+};
+
+/* Common TCS bits between the shader key and the epilog key. */
+struct si_tcs_epilog_bits {
+ unsigned prim_mode:3;
+};
+
+/* Common PS bits between the shader key and the prolog key. */
+struct si_ps_prolog_bits {
+ unsigned color_two_side:1;
+ /* TODO: add a flatshade bit that skips interpolation for colors */
+ unsigned poly_stipple:1;
+ unsigned force_persample_interp:1;
+ /* TODO:
+ * - add force_center_interp if MSAA is disabled and centroid or
+ * sample are present
+ * - add force_center_interp_bc_optimize to force center interpolation
+ * based on the bc_optimize SGPR bit if MSAA is enabled, centroid is
+ * present and sample isn't present.
+ */
+};
+
+/* Common PS bits between the shader key and the epilog key. */
+struct si_ps_epilog_bits {
+ unsigned spi_shader_col_format;
+ unsigned color_is_int8:8;
+ unsigned last_cbuf:3;
+ unsigned alpha_func:3;
+ unsigned alpha_to_one:1;
+ unsigned poly_line_smoothing:1;
+ unsigned clamp_color:1;
+};
+
+union si_shader_part_key {
+ struct {
+ struct si_vs_prolog_bits states;
+ unsigned num_input_sgprs:5;
+ unsigned last_input:4;
+ } vs_prolog;
+ struct {
+ struct si_vs_epilog_bits states;
+ unsigned prim_id_param_offset:5;
+ } vs_epilog;
+ struct {
+ struct si_tcs_epilog_bits states;
+ } tcs_epilog;
+ struct {
+ struct si_ps_prolog_bits states;
+ unsigned num_input_sgprs:5;
+ unsigned num_input_vgprs:5;
+ /* Color interpolation and two-side color selection. */
+ unsigned colors_read:8; /* color input components read */
+ unsigned num_interp_inputs:5; /* BCOLOR is at this location */
+ unsigned face_vgpr_index:5;
+ char color_attr_index[2];
+ char color_interp_vgpr_index[2]; /* -1 == constant */
+ } ps_prolog;
+ struct {
+ struct si_ps_epilog_bits states;
+ unsigned colors_written:8;
+ unsigned writes_z:1;
+ unsigned writes_stencil:1;
+ unsigned writes_samplemask:1;
+ } ps_epilog;
+};
+
union si_shader_key {
struct {
- unsigned spi_shader_col_format;
- unsigned color_is_int8:8;
- unsigned last_cbuf:3;
- unsigned color_two_side:1;
- unsigned alpha_func:3;
- unsigned alpha_to_one:1;
- unsigned poly_stipple:1;
- unsigned poly_line_smoothing:1;
- unsigned clamp_color:1;
- unsigned force_persample_interp:1;
+ struct si_ps_prolog_bits prolog;
+ struct si_ps_epilog_bits epilog;
} ps;
struct {
- unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
- /* Mask of "get_unique_index" bits - which outputs are read
- * by the next stage (needed by ES).
- * This describes how outputs are laid out in memory. */
+ struct si_vs_prolog_bits prolog;
+ struct si_vs_epilog_bits epilog;
unsigned as_es:1; /* export shader */
unsigned as_ls:1; /* local shader */
- unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
} vs;
struct {
- unsigned prim_mode:3;
+ struct si_tcs_epilog_bits epilog;
} tcs; /* tessellation control shader */
struct {
- /* Mask of "get_unique_index" bits - which outputs are read
- * by the next stage (needed by ES).
- * This describes how outputs are laid out in memory. */
+ struct si_vs_epilog_bits epilog; /* same as VS */
unsigned as_es:1; /* export shader */
- unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
} tes; /* tessellation evaluation shader */
};
@@ -267,22 +341,42 @@ struct si_shader_config {
unsigned rsrc2;
};
+/* GCN-specific shader info. */
+struct si_shader_info {
+ ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
+ ubyte num_input_sgprs;
+ ubyte num_input_vgprs;
+ char face_vgpr_index;
+ bool uses_instanceid;
+ ubyte nr_pos_exports;
+ ubyte nr_param_exports;
+};
+
struct si_shader {
struct si_shader_selector *selector;
struct si_shader *next_variant;
+ struct si_shader_part *prolog;
+ struct si_shader_part *epilog;
+
struct si_shader *gs_copy_shader;
struct si_pm4_state *pm4;
struct r600_resource *bo;
struct r600_resource *scratch_bo;
union si_shader_key key;
+ bool is_binary_shared;
+
+ /* The following data is all that's needed for binary shaders. */
struct radeon_shader_binary binary;
struct si_shader_config config;
+ struct si_shader_info info;
+};
- unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
- bool uses_instanceid;
- unsigned nr_pos_exports;
- unsigned nr_param_exports;
+struct si_shader_part {
+ struct si_shader_part *next;
+ union si_shader_part_key key;
+ struct radeon_shader_binary binary;
+ struct si_shader_config config;
};
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
@@ -310,14 +404,19 @@ static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
static inline bool si_vs_exports_prim_id(struct si_shader *shader)
{
if (shader->selector->type == PIPE_SHADER_VERTEX)
- return shader->key.vs.export_prim_id;
+ return shader->key.vs.epilog.export_prim_id;
else if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
- return shader->key.tes.export_prim_id;
+ return shader->key.tes.epilog.export_prim_id;
else
return false;
}
-/* radeonsi_shader.c */
+/* si_shader.c */
+int si_compile_tgsi_shader(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ bool is_monolithic,
+ struct pipe_debug_callback *debug);
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index bf780777b50..2dfdbeb8d8f 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -277,7 +277,7 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a
if (sctx->b.family == CHIP_STONEY) {
unsigned spi_shader_col_format =
sctx->ps_shader.cso ?
- sctx->ps_shader.current->key.ps.spi_shader_col_format : 0;
+ sctx->ps_shader.current->key.ps.epilog.spi_shader_col_format : 0;
unsigned sx_ps_downconvert = 0;
unsigned sx_blend_opt_epsilon = 0;
unsigned sx_blend_opt_control = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f64c4d45f1b..40792cbc1d5 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
/* si_state_shader.c */
bool si_update_shaders(struct si_context *sctx);
void si_init_shader_functions(struct si_context *sctx);
+bool si_init_shader_cache(struct si_screen *sscreen);
+void si_destroy_shader_cache(struct si_screen *sscreen);
/* si_state_draw.c */
void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 77a4e47c809..a6753a7a528 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -32,10 +32,221 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_ureg.h"
+#include "util/hash_table.h"
+#include "util/u_hash.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
#include "util/u_simple_shaders.h"
+/* SHADER_CACHE */
+
+/**
+ * Return the TGSI binary in a buffer. The first 4 bytes contain its size as
+ * integer.
+ */
+static void *si_get_tgsi_binary(struct si_shader_selector *sel)
+{
+ unsigned tgsi_size = tgsi_num_tokens(sel->tokens) *
+ sizeof(struct tgsi_token);
+ unsigned size = 4 + tgsi_size + sizeof(sel->so);
+ char *result = (char*)MALLOC(size);
+
+ if (!result)
+ return NULL;
+
+ *((uint32_t*)result) = size;
+ memcpy(result + 4, sel->tokens, tgsi_size);
+ memcpy(result + 4 + tgsi_size, &sel->so, sizeof(sel->so));
+ return result;
+}
+
+/** Copy "data" to "ptr" and return the next dword following copied data. */
+static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned size)
+{
+ memcpy(ptr, data, size);
+ ptr += DIV_ROUND_UP(size, 4);
+ return ptr;
+}
+
+/** Read data from "ptr". Return the next dword following the data. */
+static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size)
+{
+ memcpy(data, ptr, size);
+ ptr += DIV_ROUND_UP(size, 4);
+ return ptr;
+}
+
+/**
+ * Write the size as uint followed by the data. Return the next dword
+ * following the copied data.
+ */
+static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned size)
+{
+ *ptr++ = size;
+ return write_data(ptr, data, size);
+}
+
+/**
+ * Read the size as uint followed by the data. Return both via parameters.
+ * Return the next dword following the data.
+ */
+static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
+{
+ *size = *ptr++;
+ assert(*data == NULL);
+ *data = malloc(*size);
+ return read_data(ptr, *data, *size);
+}
+
+/**
+ * Return the shader binary in a buffer. The first 4 bytes contain its size
+ * as integer.
+ */
+static void *si_get_shader_binary(struct si_shader *shader)
+{
+ /* There is always a size of data followed by the data itself. */
+ unsigned relocs_size = shader->binary.reloc_count *
+ sizeof(shader->binary.relocs[0]);
+ unsigned disasm_size = strlen(shader->binary.disasm_string) + 1;
+ unsigned size =
+ 4 + /* total size */
+ 4 + /* CRC32 of the data below */
+ align(sizeof(shader->config), 4) +
+ align(sizeof(shader->info), 4) +
+ 4 + align(shader->binary.code_size, 4) +
+ 4 + align(shader->binary.rodata_size, 4) +
+ 4 + align(relocs_size, 4) +
+ 4 + align(disasm_size, 4);
+ void *buffer = CALLOC(1, size);
+ uint32_t *ptr = (uint32_t*)buffer;
+
+ if (!buffer)
+ return NULL;
+
+ *ptr++ = size;
+ ptr++; /* CRC32 is calculated at the end. */
+
+ ptr = write_data(ptr, &shader->config, sizeof(shader->config));
+ ptr = write_data(ptr, &shader->info, sizeof(shader->info));
+ ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size);
+ ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size);
+ ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
+ ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);
+ assert((char *)ptr - (char *)buffer == size);
+
+ /* Compute CRC32. */
+ ptr = (uint32_t*)buffer;
+ ptr++;
+ *ptr = util_hash_crc32(ptr + 1, size - 8);
+
+ return buffer;
+}
+
+static bool si_load_shader_binary(struct si_shader *shader, void *binary)
+{
+ uint32_t *ptr = (uint32_t*)binary;
+ uint32_t size = *ptr++;
+ uint32_t crc32 = *ptr++;
+ unsigned chunk_size;
+
+ if (util_hash_crc32(ptr, size - 8) != crc32) {
+ fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n");
+ return false;
+ }
+
+ ptr = read_data(ptr, &shader->config, sizeof(shader->config));
+ ptr = read_data(ptr, &shader->info, sizeof(shader->info));
+ ptr = read_chunk(ptr, (void**)&shader->binary.code,
+ &shader->binary.code_size);
+ ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
+ &shader->binary.rodata_size);
+ ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size);
+ shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]);
+ ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size);
+
+ return true;
+}
+
+/**
+ * Insert a shader into the cache. It's assumed the shader is not in the cache.
+ * Use si_shader_cache_load_shader before calling this.
+ *
+ * Returns false on failure, in which case the tgsi_binary should be freed.
+ */
+static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
+ void *tgsi_binary,
+ struct si_shader *shader)
+{
+ void *hw_binary = si_get_shader_binary(shader);
+
+ if (!hw_binary)
+ return false;
+
+ if (_mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary,
+ hw_binary) == NULL) {
+ FREE(hw_binary);
+ return false;
+ }
+
+ return true;
+}
+
+static bool si_shader_cache_load_shader(struct si_screen *sscreen,
+ void *tgsi_binary,
+ struct si_shader *shader)
+{
+ struct hash_entry *entry =
+ _mesa_hash_table_search(sscreen->shader_cache, tgsi_binary);
+ if (!entry)
+ return false;
+
+ return si_load_shader_binary(shader, entry->data);
+}
+
+static uint32_t si_shader_cache_key_hash(const void *key)
+{
+ /* The first dword is the key size. */
+ return util_hash_crc32(key, *(uint32_t*)key);
+}
+
+static bool si_shader_cache_key_equals(const void *a, const void *b)
+{
+ uint32_t *keya = (uint32_t*)a;
+ uint32_t *keyb = (uint32_t*)b;
+
+ /* The first dword is the key size. */
+ if (*keya != *keyb)
+ return false;
+
+ return memcmp(keya, keyb, *keya) == 0;
+}
+
+static void si_destroy_shader_cache_entry(struct hash_entry *entry)
+{
+ FREE((void*)entry->key);
+ FREE(entry->data);
+}
+
+bool si_init_shader_cache(struct si_screen *sscreen)
+{
+ pipe_mutex_init(sscreen->shader_cache_mutex);
+ sscreen->shader_cache =
+ _mesa_hash_table_create(NULL,
+ si_shader_cache_key_hash,
+ si_shader_cache_key_equals);
+ return sscreen->shader_cache != NULL;
+}
+
+void si_destroy_shader_cache(struct si_screen *sscreen)
+{
+ if (sscreen->shader_cache)
+ _mesa_hash_table_destroy(sscreen->shader_cache,
+ si_destroy_shader_cache_entry);
+ pipe_mutex_destroy(sscreen->shader_cache_mutex);
+}
+
+/* SHADER STATES */
+
static void si_set_tesseval_regs(struct si_shader *shader,
struct si_pm4_state *pm4)
{
@@ -108,7 +319,7 @@ static void si_shader_ls(struct si_shader *shader)
/* We need at least 2 components for LS.
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
num_user_sgprs = SI_LS_NUM_USER_SGPR;
num_sgprs = shader->config.num_sgprs;
@@ -181,7 +392,7 @@ static void si_shader_es(struct si_shader *shader)
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
num_user_sgprs = SI_ES_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = 3; /* all components are needed for TES */
@@ -347,7 +558,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
num_user_sgprs = SI_VS_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = 3; /* all components are needed for TES */
@@ -363,19 +574,19 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
assert(num_sgprs <= 104);
/* VS is required to export at least one param. */
- nparams = MAX2(shader->nr_param_exports, 1);
+ nparams = MAX2(shader->info.nr_param_exports, 1);
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
S_0286C4_VS_EXPORT_COUNT(nparams - 1));
si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
- S_02870C_POS1_EXPORT_FORMAT(shader->nr_pos_exports > 1 ?
+ S_02870C_POS1_EXPORT_FORMAT(shader->info.nr_pos_exports > 1 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS2_EXPORT_FORMAT(shader->nr_pos_exports > 2 ?
+ S_02870C_POS2_EXPORT_FORMAT(shader->info.nr_pos_exports > 2 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS3_EXPORT_FORMAT(shader->nr_pos_exports > 3 ?
+ S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE));
@@ -415,7 +626,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps)
unsigned num_colors = !!(info->colors_read & 0x0f) +
!!(info->colors_read & 0xf0);
unsigned num_interp = ps->selector->info.num_inputs +
- (ps->key.ps.color_two_side ? num_colors : 0);
+ (ps->key.ps.prolog.color_two_side ? num_colors : 0);
assert(num_interp <= 32);
return MIN2(num_interp, 32);
@@ -423,7 +634,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps)
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
{
- unsigned value = shader->key.ps.spi_shader_col_format;
+ unsigned value = shader->key.ps.epilog.spi_shader_col_format;
unsigned i, num_targets = (util_last_bit(value) + 3) / 4;
/* If the i-th target format is set, all previous target formats must
@@ -528,7 +739,7 @@ static void si_shader_ps(struct si_shader *shader)
if (!spi_shader_col_format &&
!info->writes_z && !info->writes_stencil && !info->writes_samplemask &&
(shader->selector->info.uses_kill ||
- shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
+ shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS))
spi_shader_col_format = V_028714_SPI_SHADER_32_R;
si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
@@ -638,11 +849,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
switch (sel->type) {
case PIPE_SHADER_VERTEX:
- if (sctx->vertex_elements)
- for (i = 0; i < sctx->vertex_elements->count; ++i)
- key->vs.instance_divisors[i] =
+ if (sctx->vertex_elements) {
+ unsigned count = MIN2(sel->info.num_inputs,
+ sctx->vertex_elements->count);
+ for (i = 0; i < count; ++i)
+ key->vs.prolog.instance_divisors[i] =
sctx->vertex_elements->elements[i].instance_divisor;
-
+ }
if (sctx->tes_shader.cso)
key->vs.as_ls = 1;
else if (sctx->gs_shader.cso)
@@ -650,17 +863,17 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
sctx->ps_shader.cso->info.uses_primid)
- key->vs.export_prim_id = 1;
+ key->vs.epilog.export_prim_id = 1;
break;
case PIPE_SHADER_TESS_CTRL:
- key->tcs.prim_mode =
+ key->tcs.epilog.prim_mode =
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
break;
case PIPE_SHADER_TESS_EVAL:
if (sctx->gs_shader.cso)
key->tes.as_es = 1;
else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
- key->tes.export_prim_id = 1;
+ key->tes.epilog.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
break;
@@ -670,13 +883,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
sel->info.colors_written == 0x1)
- key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
+ key->ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
if (blend) {
/* Select the shader color format based on whether
* blending or alpha are needed.
*/
- key->ps.spi_shader_col_format =
+ key->ps.epilog.spi_shader_col_format =
(blend->blend_enable_4bit & blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format_blend_alpha) |
(blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
@@ -686,26 +899,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
(~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format);
} else
- key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
+ key->ps.epilog.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
/* If alpha-to-coverage is enabled, we have to export alpha
* even if there is no color buffer.
*/
- if (!(key->ps.spi_shader_col_format & 0xf) &&
+ if (!(key->ps.epilog.spi_shader_col_format & 0xf) &&
blend && blend->alpha_to_coverage)
- key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
+ key->ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
/* On SI and CIK except Hawaii, the CB doesn't clamp outputs
* to the range supported by the type if a channel has less
* than 16 bits and the export format is 16_ABGR.
*/
if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
- key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
+ key->ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
/* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
- if (!key->ps.last_cbuf) {
- key->ps.spi_shader_col_format &= sel->colors_written_4bit;
- key->ps.color_is_int8 &= sel->info.colors_written;
+ if (!key->ps.epilog.last_cbuf) {
+ key->ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
+ key->ps.epilog.color_is_int8 &= sel->info.colors_written;
}
if (rs) {
@@ -714,31 +927,32 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS;
- key->ps.color_two_side = rs->two_side && sel->info.colors_read;
+ key->ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
if (sctx->queued.named.blend) {
- key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
- rs->multisample_enable &&
- !sctx->framebuffer.cb0_is_integer;
+ key->ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
+ rs->multisample_enable &&
+ !sctx->framebuffer.cb0_is_integer;
}
- key->ps.poly_stipple = rs->poly_stipple_enable && is_poly;
- key->ps.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
- (is_line && rs->line_smooth)) &&
- sctx->framebuffer.nr_samples <= 1;
- key->ps.clamp_color = rs->clamp_fragment_color;
-
- key->ps.force_persample_interp = rs->force_persample_interp &&
- rs->multisample_enable &&
- sctx->framebuffer.nr_samples > 1 &&
- sctx->ps_iter_samples > 1 &&
- (sel->info.uses_persp_center ||
- sel->info.uses_persp_centroid ||
- sel->info.uses_linear_center ||
- sel->info.uses_linear_centroid);
+ key->ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
+ key->ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
+ (is_line && rs->line_smooth)) &&
+ sctx->framebuffer.nr_samples <= 1;
+ key->ps.epilog.clamp_color = rs->clamp_fragment_color;
+
+ key->ps.prolog.force_persample_interp =
+ rs->force_persample_interp &&
+ rs->multisample_enable &&
+ sctx->framebuffer.nr_samples > 1 &&
+ sctx->ps_iter_samples > 1 &&
+ (sel->info.uses_persp_center ||
+ sel->info.uses_persp_centroid ||
+ sel->info.uses_linear_center ||
+ sel->info.uses_linear_centroid);
}
- key->ps.alpha_func = si_get_alpha_test_func(sctx);
+ key->ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
break;
}
default:
@@ -821,6 +1035,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
struct si_screen *sscreen = (struct si_screen *)ctx->screen;
+ struct si_context *sctx = (struct si_context*)ctx;
struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
int i;
@@ -900,6 +1115,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
for (i = 0; i < 8; i++)
if (sel->info.colors_written & (1 << i))
sel->colors_written_4bit |= 0xf << (4 * i);
+
+ for (i = 0; i < sel->info.num_inputs; i++) {
+ if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ int index = sel->info.input_semantic_index[i];
+ sel->color_attr_index[index] = i;
+ }
+ }
break;
}
@@ -921,6 +1143,44 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
break;
}
+ /* Compile the main shader part for use with a prolog and/or epilog. */
+ if (sel->type != PIPE_SHADER_GEOMETRY &&
+ !sscreen->use_monolithic_shaders) {
+ struct si_shader *shader = CALLOC_STRUCT(si_shader);
+ void *tgsi_binary;
+
+ if (!shader)
+ goto error;
+
+ shader->selector = sel;
+
+ tgsi_binary = si_get_tgsi_binary(sel);
+
+ /* Try to load the shader from the shader cache. */
+ pipe_mutex_lock(sscreen->shader_cache_mutex);
+
+ if (tgsi_binary &&
+ si_shader_cache_load_shader(sscreen, tgsi_binary, shader)) {
+ FREE(tgsi_binary);
+ } else {
+ /* Compile the shader if it hasn't been loaded from the cache. */
+ if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false,
+ &sctx->b.debug) != 0) {
+ FREE(shader);
+ FREE(tgsi_binary);
+ pipe_mutex_unlock(sscreen->shader_cache_mutex);
+ goto error;
+ }
+
+ if (tgsi_binary &&
+ !si_shader_cache_insert_shader(sscreen, tgsi_binary, shader))
+ FREE(tgsi_binary);
+ }
+ pipe_mutex_unlock(sscreen->shader_cache_mutex);
+
+ sel->main_shader_part = shader;
+ }
+
/* Pre-compilation. */
if (sel->type == PIPE_SHADER_GEOMETRY ||
sscreen->b.debug_flags & DBG_PRECOMPILE) {
@@ -934,27 +1194,29 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
*/
switch (sel->type) {
case PIPE_SHADER_TESS_CTRL:
- key.tcs.prim_mode = PIPE_PRIM_TRIANGLES;
+ key.tcs.epilog.prim_mode = PIPE_PRIM_TRIANGLES;
break;
case PIPE_SHADER_FRAGMENT:
- key.ps.alpha_func = PIPE_FUNC_ALWAYS;
+ key.ps.epilog.alpha_func = PIPE_FUNC_ALWAYS;
for (i = 0; i < 8; i++)
if (sel->info.colors_written & (1 << i))
- key.ps.spi_shader_col_format |=
+ key.ps.epilog.spi_shader_col_format |=
V_028710_SPI_SHADER_FP16_ABGR << (i * 4);
break;
}
- if (si_shader_select_with_key(ctx, &state, &key)) {
- fprintf(stderr, "radeonsi: can't create a shader\n");
- tgsi_free_tokens(sel->tokens);
- FREE(sel);
- return NULL;
- }
+ if (si_shader_select_with_key(ctx, &state, &key))
+ goto error;
}
pipe_mutex_init(sel->mutex);
return sel;
+
+error:
+ fprintf(stderr, "radeonsi: can't create a shader\n");
+ tgsi_free_tokens(sel->tokens);
+ FREE(sel);
+ return NULL;
}
/**
@@ -1119,6 +1381,9 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
p = c;
}
+ if (sel->main_shader_part)
+ si_delete_shader(sctx, sel->main_shader_part);
+
pipe_mutex_destroy(sel->mutex);
free(sel->tokens);
free(sel);
@@ -1144,14 +1409,14 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
for (j = 0; j < vsinfo->num_outputs; j++) {
if (name == vsinfo->output_semantic_name[j] &&
index == vsinfo->output_semantic_index[j]) {
- ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
+ ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[j]);
break;
}
}
if (name == TGSI_SEMANTIC_PRIMID)
/* PrimID is written after the last output. */
- ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
+ ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]);
else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
/* No corresponding output found, load defaults into input.
* Don't set any other bits.
@@ -1191,7 +1456,7 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
}
}
- if (ps->key.ps.color_two_side) {
+ if (ps->key.ps.prolog.color_two_side) {
unsigned bcol = TGSI_SEMANTIC_BCOLOR;
for (i = 0; i < 2; i++) {
@@ -1745,8 +2010,8 @@ bool si_update_shaders(struct si_context *sctx)
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
- if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
- sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
+ if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.epilog.poly_line_smoothing) {
+ sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.epilog.poly_line_smoothing;
si_mark_atom_dirty(sctx, &sctx->msaa_config);
if (sctx->b.chip_class == SI)
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 80526ed4d15..fe6cf71a6e5 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -590,6 +590,16 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
}
else {
/* non-indexed drawing */
+ if (svga->state.hw_draw.ib_format != SVGA3D_FORMAT_INVALID) {
+ /* Unbind previously bound index buffer */
+ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, NULL,
+ SVGA3D_FORMAT_INVALID, 0);
+ if (ret != PIPE_OK)
+ return ret;
+ svga->state.hw_draw.ib_format = SVGA3D_FORMAT_INVALID;
+ svga->state.hw_draw.ib = NULL;
+ }
+
if (instance_count > 1) {
ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
vcount,
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index d5405f8eacf..c9abd49ec1e 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -468,12 +468,15 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -527,12 +530,15 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
return 0;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -619,12 +625,15 @@ vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
return SVGA3D_DX_MAX_SAMPLERS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 1223e446055..0c5afeb4cf9 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -1782,7 +1782,7 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
{
- const unsigned n = emit->info.num_inputs + index;
+ const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index;
assert(index < Elements(emit->system_value_indexes));
emit->system_value_indexes[index] = n;
return n;
@@ -2446,7 +2446,7 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit)
else {
assert(emit->unit == PIPE_SHADER_VERTEX);
- for (i = 0; i < emit->info.num_inputs; i++) {
+ for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
unsigned usage_mask = emit->info.input_usage_mask[i];
unsigned index = i;
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 2ce2b3aef75..57f851833e5 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -50,7 +50,8 @@ struct trace_query
static inline struct trace_query *
-trace_query(struct pipe_query *query) {
+trace_query(struct pipe_query *query)
+{
return (struct trace_query *)query;
}
@@ -93,7 +94,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
return NULL;
assert(surface->texture);
- if(!surface->texture)
+ if (!surface->texture)
return surface;
tr_surf = trace_surface(surface);
@@ -105,7 +106,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
}
-static inline void
+static void
trace_context_draw_vbo(struct pipe_context *_pipe,
const struct pipe_draw_info *info)
{
@@ -135,7 +136,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe,
}
-static inline struct pipe_query *
+static struct pipe_query *
trace_context_create_query(struct pipe_context *_pipe,
unsigned query_type,
unsigned index)
@@ -173,7 +174,7 @@ trace_context_create_query(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_destroy_query(struct pipe_context *_pipe,
struct pipe_query *_query)
{
@@ -195,7 +196,7 @@ trace_context_destroy_query(struct pipe_context *_pipe,
}
-static inline boolean
+static boolean
trace_context_begin_query(struct pipe_context *_pipe,
struct pipe_query *query)
{
@@ -217,7 +218,7 @@ trace_context_begin_query(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_end_query(struct pipe_context *_pipe,
struct pipe_query *query)
{
@@ -237,7 +238,7 @@ trace_context_end_query(struct pipe_context *_pipe,
}
-static inline boolean
+static boolean
trace_context_get_query_result(struct pipe_context *_pipe,
struct pipe_query *_query,
boolean wait,
@@ -272,7 +273,7 @@ trace_context_get_query_result(struct pipe_context *_pipe,
}
-static inline void *
+static void *
trace_context_create_blend_state(struct pipe_context *_pipe,
const struct pipe_blend_state *state)
{
@@ -295,7 +296,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_blend_state(struct pipe_context *_pipe,
void *state)
{
@@ -313,7 +314,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_blend_state(struct pipe_context *_pipe,
void *state)
{
@@ -331,7 +332,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe,
}
-static inline void *
+static void *
trace_context_create_sampler_state(struct pipe_context *_pipe,
const struct pipe_sampler_state *state)
{
@@ -354,7 +355,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_sampler_states(struct pipe_context *_pipe,
unsigned shader,
unsigned start,
@@ -381,7 +382,7 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_sampler_state(struct pipe_context *_pipe,
void *state)
{
@@ -399,7 +400,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe,
}
-static inline void *
+static void *
trace_context_create_rasterizer_state(struct pipe_context *_pipe,
const struct pipe_rasterizer_state *state)
{
@@ -422,7 +423,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
void *state)
{
@@ -440,7 +441,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
void *state)
{
@@ -458,7 +459,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
}
-static inline void *
+static void *
trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
const struct pipe_depth_stencil_alpha_state *state)
{
@@ -481,7 +482,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
void *state)
{
@@ -499,7 +500,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
void *state)
{
@@ -518,7 +519,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
#define TRACE_SHADER_STATE(shader_type) \
- static inline void * \
+ static void * \
trace_context_create_##shader_type##_state(struct pipe_context *_pipe, \
const struct pipe_shader_state *state) \
{ \
@@ -534,7 +535,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
return result; \
} \
\
- static inline void \
+ static void \
trace_context_bind_##shader_type##_state(struct pipe_context *_pipe, \
void *state) \
{ \
@@ -547,7 +548,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
trace_dump_call_end(); \
} \
\
- static inline void \
+ static void \
trace_context_delete_##shader_type##_state(struct pipe_context *_pipe, \
void *state) \
{ \
@@ -570,6 +571,51 @@ TRACE_SHADER_STATE(tes)
static inline void *
+trace_context_create_compute_state(struct pipe_context *_pipe,
+ const struct pipe_compute_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ void * result;
+
+ trace_dump_call_begin("pipe_context", "create_compute_state");
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(compute_state, state);
+ result = pipe->create_compute_state(pipe, state);
+ trace_dump_ret(ptr, result);
+ trace_dump_call_end();
+ return result;
+}
+
+static inline void
+trace_context_bind_compute_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_compute_state");
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+ pipe->bind_compute_state(pipe, state);
+ trace_dump_call_end();
+}
+
+static inline void
+trace_context_delete_compute_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "delete_compute_state");
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+ pipe->delete_compute_state(pipe, state);
+ trace_dump_call_end();
+}
+
+static void *
trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
unsigned num_elements,
const struct pipe_vertex_element *elements)
@@ -597,7 +643,7 @@ trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
void *state)
{
@@ -615,7 +661,7 @@ trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
void *state)
{
@@ -633,7 +679,7 @@ trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_blend_color(struct pipe_context *_pipe,
const struct pipe_blend_color *state)
{
@@ -651,7 +697,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_stencil_ref(struct pipe_context *_pipe,
const struct pipe_stencil_ref *state)
{
@@ -669,7 +715,7 @@ trace_context_set_stencil_ref(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_clip_state(struct pipe_context *_pipe,
const struct pipe_clip_state *state)
{
@@ -686,7 +732,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static inline void
+static void
trace_context_set_sample_mask(struct pipe_context *_pipe,
unsigned sample_mask)
{
@@ -703,7 +749,7 @@ trace_context_set_sample_mask(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static inline void
+static void
trace_context_set_constant_buffer(struct pipe_context *_pipe,
uint shader, uint index,
struct pipe_constant_buffer *constant_buffer)
@@ -731,7 +777,7 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_framebuffer_state(struct pipe_context *_pipe,
const struct pipe_framebuffer_state *state)
{
@@ -743,9 +789,9 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe,
/* Unwrap the input state */
memcpy(&unwrapped_state, state, sizeof(unwrapped_state));
- for(i = 0; i < state->nr_cbufs; ++i)
+ for (i = 0; i < state->nr_cbufs; ++i)
unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]);
- for(i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i)
+ for (i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i)
unwrapped_state.cbufs[i] = NULL;
unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf);
state = &unwrapped_state;
@@ -761,7 +807,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_polygon_stipple(struct pipe_context *_pipe,
const struct pipe_poly_stipple *state)
{
@@ -779,7 +825,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_scissor_states(struct pipe_context *_pipe,
unsigned start_slot,
unsigned num_scissors,
@@ -801,7 +847,7 @@ trace_context_set_scissor_states(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_viewport_states(struct pipe_context *_pipe,
unsigned start_slot,
unsigned num_viewports,
@@ -825,8 +871,8 @@ trace_context_set_viewport_states(struct pipe_context *_pipe,
static struct pipe_sampler_view *
trace_context_create_sampler_view(struct pipe_context *_pipe,
- struct pipe_resource *_resource,
- const struct pipe_sampler_view *templ)
+ struct pipe_resource *_resource,
+ const struct pipe_sampler_view *templ)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct trace_resource *tr_res = trace_resource(_resource);
@@ -868,7 +914,7 @@ trace_context_create_sampler_view(struct pipe_context *_pipe,
static void
trace_context_sampler_view_destroy(struct pipe_context *_pipe,
- struct pipe_sampler_view *_view)
+ struct pipe_sampler_view *_view)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct trace_sampler_view *tr_view = trace_sampler_view(_view);
@@ -910,7 +956,7 @@ trace_context_create_surface(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, resource);
-
+
trace_dump_arg_begin("surf_tmpl");
trace_dump_surface_template(surf_tmpl, resource->target);
trace_dump_arg_end();
@@ -948,7 +994,7 @@ trace_context_surface_destroy(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_sampler_views(struct pipe_context *_pipe,
unsigned shader,
unsigned start,
@@ -964,7 +1010,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe,
/* remove this when we have pipe->set_sampler_views(..., start, ...) */
assert(start == 0);
- for(i = 0; i < num; ++i) {
+ for (i = 0; i < num; ++i) {
tr_view = trace_sampler_view(views[i]);
unwrapped_views[i] = tr_view ? tr_view->sampler_view : NULL;
}
@@ -984,7 +1030,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_vertex_buffers(struct pipe_context *_pipe,
unsigned start_slot, unsigned num_buffers,
const struct pipe_vertex_buffer *buffers)
@@ -1018,7 +1064,7 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_index_buffer(struct pipe_context *_pipe,
const struct pipe_index_buffer *ib)
{
@@ -1043,7 +1089,7 @@ trace_context_set_index_buffer(struct pipe_context *_pipe,
}
-static inline struct pipe_stream_output_target *
+static struct pipe_stream_output_target *
trace_context_create_stream_output_target(struct pipe_context *_pipe,
struct pipe_resource *res,
unsigned buffer_offset,
@@ -1073,7 +1119,7 @@ trace_context_create_stream_output_target(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_stream_output_target_destroy(
struct pipe_context *_pipe,
struct pipe_stream_output_target *target)
@@ -1092,7 +1138,7 @@ trace_context_stream_output_target_destroy(
}
-static inline void
+static void
trace_context_set_stream_output_targets(struct pipe_context *_pipe,
unsigned num_targets,
struct pipe_stream_output_target **tgs,
@@ -1114,7 +1160,7 @@ trace_context_set_stream_output_targets(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_resource_copy_region(struct pipe_context *_pipe,
struct pipe_resource *dst,
unsigned dst_level,
@@ -1149,7 +1195,7 @@ trace_context_resource_copy_region(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_blit(struct pipe_context *_pipe,
const struct pipe_blit_info *_info)
{
@@ -1191,7 +1237,7 @@ trace_context_flush_resource(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_clear(struct pipe_context *_pipe,
unsigned buffers,
const union pipe_color_union *color,
@@ -1220,7 +1266,7 @@ trace_context_clear(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_clear_render_target(struct pipe_context *_pipe,
struct pipe_surface *dst,
const union pipe_color_union *color,
@@ -1247,7 +1293,7 @@ trace_context_clear_render_target(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static inline void
+static void
trace_context_clear_depth_stencil(struct pipe_context *_pipe,
struct pipe_surface *dst,
unsigned clear_flags,
@@ -1306,7 +1352,7 @@ trace_context_clear_texture(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static inline void
+static void
trace_context_flush(struct pipe_context *_pipe,
struct pipe_fence_handle **fence,
unsigned flags)
@@ -1364,7 +1410,7 @@ trace_context_generate_mipmap(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_destroy(struct pipe_context *_pipe)
{
struct trace_context *tr_ctx = trace_context(_pipe);
@@ -1414,7 +1460,7 @@ trace_context_transfer_map(struct pipe_context *_context,
*transfer = trace_transfer_create(tr_context, tr_res, result);
if (map) {
- if(usage & PIPE_TRANSFER_WRITE) {
+ if (usage & PIPE_TRANSFER_WRITE) {
trace_transfer(*transfer)->map = map;
}
}
@@ -1432,9 +1478,7 @@ trace_context_transfer_flush_region( struct pipe_context *_context,
struct pipe_context *context = tr_context->pipe;
struct pipe_transfer *transfer = tr_transfer->transfer;
- context->transfer_flush_region(context,
- transfer,
- box);
+ context->transfer_flush_region(context, transfer, box);
}
static void
@@ -1446,7 +1490,7 @@ trace_context_transfer_unmap(struct pipe_context *_context,
struct pipe_context *context = tr_ctx->pipe;
struct pipe_transfer *transfer = tr_trans->transfer;
- if(tr_trans->map) {
+ if (tr_trans->map) {
/*
* Fake a transfer_inline_write
*/
@@ -1525,15 +1569,16 @@ trace_context_transfer_inline_write(struct pipe_context *_context,
trace_dump_call_end();
- context->transfer_inline_write(context, resource,
- level, usage, box, data, stride, layer_stride);
+ context->transfer_inline_write(context, resource, level, usage, box,
+ data, stride, layer_stride);
}
-static void trace_context_render_condition(struct pipe_context *_context,
- struct pipe_query *query,
- boolean condition,
- uint mode)
+static void
+trace_context_render_condition(struct pipe_context *_context,
+ struct pipe_query *query,
+ boolean condition,
+ uint mode)
{
struct trace_context *tr_context = trace_context(_context);
struct pipe_context *context = tr_context->pipe;
@@ -1553,7 +1598,8 @@ static void trace_context_render_condition(struct pipe_context *_context,
}
-static void trace_context_texture_barrier(struct pipe_context *_context)
+static void
+trace_context_texture_barrier(struct pipe_context *_context)
{
struct trace_context *tr_context = trace_context(_context);
struct pipe_context *context = tr_context->pipe;
@@ -1568,8 +1614,9 @@ static void trace_context_texture_barrier(struct pipe_context *_context)
}
-static void trace_context_memory_barrier(struct pipe_context *_context,
- unsigned flags)
+static void
+trace_context_memory_barrier(struct pipe_context *_context,
+ unsigned flags)
{
struct trace_context *tr_context = trace_context(_context);
struct pipe_context *context = tr_context->pipe;
@@ -1583,9 +1630,10 @@ static void trace_context_memory_barrier(struct pipe_context *_context,
}
-static void trace_context_set_tess_state(struct pipe_context *_context,
- const float default_outer_level[4],
- const float default_inner_level[2])
+static void
+trace_context_set_tess_state(struct pipe_context *_context,
+ const float default_outer_level[4],
+ const float default_inner_level[2])
{
struct trace_context *tr_context = trace_context(_context);
struct pipe_context *context = tr_context->pipe;
@@ -1638,12 +1686,31 @@ static void trace_context_set_shader_buffers(struct pipe_context *_context,
FREE(_buffers);
}
+static void trace_context_launch_grid(struct pipe_context *_pipe,
+ const struct pipe_grid_info *info)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "launch_grid");
-static const struct debug_named_value rbug_blocker_flags[] = {
- {"before", 1, NULL},
- {"after", 2, NULL},
- DEBUG_NAMED_VALUE_END
-};
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(grid_info, info);
+
+ trace_dump_trace_flush();
+
+ if (info->indirect) {
+ struct pipe_grid_info _info;
+
+ memcpy(&_info, info, sizeof(_info));
+ _info.indirect = trace_resource_unwrap(tr_ctx, _info.indirect);
+ pipe->launch_grid(pipe, &_info);
+ } else {
+ pipe->launch_grid(pipe, info);
+ }
+
+ trace_dump_call_end();
+}
struct pipe_context *
trace_context_create(struct trace_screen *tr_scr,
@@ -1654,7 +1721,7 @@ trace_context_create(struct trace_screen *tr_scr,
if (!pipe)
goto error1;
- if(!trace_enabled())
+ if (!trace_enabled())
goto error1;
tr_ctx = CALLOC_STRUCT(trace_context);
@@ -1703,6 +1770,9 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(create_tes_state);
TR_CTX_INIT(bind_tes_state);
TR_CTX_INIT(delete_tes_state);
+ TR_CTX_INIT(create_compute_state);
+ TR_CTX_INIT(bind_compute_state);
+ TR_CTX_INIT(delete_compute_state);
TR_CTX_INIT(create_vertex_elements_state);
TR_CTX_INIT(bind_vertex_elements_state);
TR_CTX_INIT(delete_vertex_elements_state);
@@ -1738,6 +1808,7 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(memory_barrier);
TR_CTX_INIT(set_tess_state);
TR_CTX_INIT(set_shader_buffers);
+ TR_CTX_INIT(launch_grid);
TR_CTX_INIT(transfer_map);
TR_CTX_INIT(transfer_unmap);
@@ -1756,7 +1827,7 @@ error1:
/**
- * Sanity checker: check that the given context really is a
+ * Sanity checker: check that the given context really is a
* trace context (and not the wrapped driver's context).
*/
void
@@ -1765,4 +1836,3 @@ trace_context_check(const struct pipe_context *pipe)
struct trace_context *tr_ctx = (struct trace_context *) pipe;
assert(tr_ctx->base.destroy == trace_context_destroy);
}
-
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index cfbf53cf767..0627e5ab5d7 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -305,6 +305,36 @@ void trace_dump_shader_state(const struct pipe_shader_state *state)
}
+void trace_dump_compute_state(const struct pipe_compute_state *state)
+{
+ if (!trace_dumping_enabled_locked())
+ return;
+
+ if (!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_compute_state");
+
+ trace_dump_member_begin("prog");
+ if (state->prog) {
+ static char str[64 * 1024];
+ tgsi_dump_str(state->prog, 0, str, sizeof(str));
+ trace_dump_string(str);
+ } else {
+ trace_dump_null();
+ }
+ trace_dump_member_end();
+
+ trace_dump_member(uint, state, req_local_mem);
+ trace_dump_member(uint, state, req_private_mem);
+ trace_dump_member(uint, state, req_input_mem);
+
+ trace_dump_struct_end();
+}
+
+
void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state)
{
unsigned i;
@@ -864,3 +894,33 @@ trace_dump_query_result(unsigned query_type,
break;
}
}
+
+void trace_dump_grid_info(const struct pipe_grid_info *state)
+{
+ if (!trace_dumping_enabled_locked())
+ return;
+
+ if (!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_grid_info");
+
+ trace_dump_member(uint, state, pc);
+ trace_dump_member(ptr, state, input);
+
+ trace_dump_member_begin("block");
+ trace_dump_array(uint, state->block, Elements(state->block));
+ trace_dump_member_end();
+
+ trace_dump_member_begin("grid");
+ trace_dump_array(uint, state->grid, Elements(state->grid));
+ trace_dump_member_end();
+
+ trace_dump_member(ptr, state, indirect);
+ trace_dump_member(uint, state, indirect_offset);
+
+ trace_dump_struct_end();
+}
+
diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h
index 4f4ade155bc..ee0720d8ac8 100644
--- a/src/gallium/drivers/trace/tr_dump_state.h
+++ b/src/gallium/drivers/trace/tr_dump_state.h
@@ -50,6 +50,8 @@ void trace_dump_token(const struct tgsi_token *token);
void trace_dump_shader_state(const struct pipe_shader_state *state);
+void trace_dump_compute_state(const struct pipe_compute_state *state);
+
void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state);
void trace_dump_blend_state(const struct pipe_blend_state *state);
@@ -87,4 +89,6 @@ void trace_dump_blit_info(const struct pipe_blit_info *);
void trace_dump_query_result(unsigned query_type,
const union pipe_query_result *result);
+void trace_dump_grid_info(const struct pipe_grid_info *state);
+
#endif /* TR_STATE_H */
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index ff057e2a4a4..0612109c800 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -173,6 +173,30 @@ trace_screen_get_paramf(struct pipe_screen *_screen,
}
+static int
+trace_screen_get_compute_param(struct pipe_screen *_screen,
+ enum pipe_compute_cap param, void *data)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ int result;
+
+ trace_dump_call_begin("pipe_screen", "get_compute_param");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(int, param);
+ trace_dump_arg(ptr, data);
+
+ result = screen->get_compute_param(screen, param, data);
+
+ trace_dump_ret(int, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
static boolean
trace_screen_is_format_supported(struct pipe_screen *_screen,
enum pipe_format format,
@@ -472,6 +496,7 @@ trace_screen_create(struct pipe_screen *screen)
tr_scr->base.get_param = trace_screen_get_param;
tr_scr->base.get_shader_param = trace_screen_get_shader_param;
tr_scr->base.get_paramf = trace_screen_get_paramf;
+ tr_scr->base.get_compute_param = trace_screen_get_compute_param;
tr_scr->base.is_format_supported = trace_screen_is_format_supported;
assert(screen->context_create);
tr_scr->base.context_create = trace_screen_context_create;
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index efbb69b71a7..f9eb0e151c5 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -193,6 +193,7 @@ qir_is_raw_mov(struct qinst *inst)
return ((inst->op == QOP_MOV ||
inst->op == QOP_FMOV ||
inst->op == QOP_MMOV) &&
+ inst->cond == QPU_COND_ALWAYS &&
!inst->dst.pack &&
!inst->src[0].pack);
}
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index 2f280c54523..ee1e9aafbb9 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -575,7 +575,7 @@ void
qir_schedule_instructions(struct vc4_compile *c)
{
void *mem_ctx = ralloc_context(NULL);
- struct schedule_state state = { 0 };
+ struct schedule_state state = { { 0 } };
if (debug) {
fprintf(stderr, "Pre-schedule instructions\n");
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index b06702afea2..450b97fc014 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -312,7 +312,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
}
- bool handled_qinst_cond = true;
+ bool handled_qinst_cond = false;
switch (qinst->op) {
case QOP_RCP:
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index b19d31af6ac..a4b3efcfda3 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -357,9 +357,12 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return VC4_MAX_TEXTURE_SAMPLERS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
default:
fprintf(stderr, "unknown shader param %d\n", param);
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 6c95b7b2178..ee68fdd6f6f 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -48,6 +48,7 @@ struct pipe_constant_buffer;
struct pipe_debug_callback;
struct pipe_depth_stencil_alpha_state;
struct pipe_draw_info;
+struct pipe_grid_info;
struct pipe_fence_handle;
struct pipe_framebuffer_state;
struct pipe_image_view;
@@ -312,14 +313,14 @@ struct pipe_context {
* \param shader selects shader stage
* \param start_slot first image slot to bind.
* \param count number of consecutive images to bind.
- * \param buffers array of pointers to the images to bind, it
+ * \param buffers array of the images to bind, it
* should contain at least \a count elements
* unless it's NULL, in which case no images will
* be bound.
*/
void (*set_shader_images)(struct pipe_context *, unsigned shader,
unsigned start_slot, unsigned count,
- struct pipe_image_view **images);
+ struct pipe_image_view *images);
void (*set_vertex_buffers)( struct pipe_context *,
unsigned start_slot,
@@ -477,16 +478,6 @@ struct pipe_context {
void (*surface_destroy)(struct pipe_context *ctx,
struct pipe_surface *);
- /**
- * Create an image view into a buffer or texture to be used with load,
- * store, and atomic instructions by a shader stage.
- */
- struct pipe_image_view * (*create_image_view)(struct pipe_context *ctx,
- struct pipe_resource *texture,
- const struct pipe_image_view *templat);
-
- void (*image_view_destroy)(struct pipe_context *ctx,
- struct pipe_image_view *view);
/**
* Map a resource.
@@ -618,23 +609,9 @@ struct pipe_context {
/**
* Launch the compute kernel starting from instruction \a pc of the
* currently bound compute program.
- *
- * \a grid_layout and \a block_layout are arrays of size \a
- * PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the
- * grid (in block units) and working block (in thread units) to be
- * used, respectively.
- *
- * \a pc For drivers that use PIPE_SHADER_IR_LLVM as their prefered IR,
- * this value will be the index of the kernel in the opencl.kernels
- * metadata list.
- *
- * \a input will be used to initialize the INPUT resource, and it
- * should point to a buffer of at least
- * pipe_compute_state::req_input_mem bytes.
*/
void (*launch_grid)(struct pipe_context *context,
- const uint *block_layout, const uint *grid_layout,
- uint32_t pc, const void *input);
+ const struct pipe_grid_info *info);
/*@}*/
/**
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index b01f6ea3dcb..010be62e638 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -725,6 +725,8 @@ enum pipe_shader_cap
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE,
PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT,
PIPE_SHADER_CAP_MAX_SHADER_BUFFERS,
+ PIPE_SHADER_CAP_SUPPORTED_IRS,
+ PIPE_SHADER_CAP_MAX_SHADER_IMAGES,
};
/**
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 6539017b77c..9d4a96a5a7e 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -79,6 +79,7 @@ enum tgsi_file_type {
TGSI_FILE_IMAGE =10,
TGSI_FILE_SAMPLER_VIEW =11,
TGSI_FILE_BUFFER =12,
+ TGSI_FILE_MEMORY =13,
TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */
};
@@ -129,7 +130,8 @@ struct tgsi_declaration
unsigned Local : 1; /**< optimize as subroutine local variable? */
unsigned Array : 1; /**< extra array info? */
unsigned Atomic : 1; /**< atomic only? for TGSI_FILE_BUFFER */
- unsigned Padding : 5;
+ unsigned Shared : 1; /**< shared storage for TGSI_FILE_MEMORY */
+ unsigned Padding : 4;
};
struct tgsi_declaration_range
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index ed62a33ad72..c568c483940 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -393,14 +393,12 @@ struct pipe_sampler_view
/**
- * A view into a writable buffer or texture that can be bound to a shader
+ * A description of a writable buffer or texture that can be bound to a shader
* stage.
*/
struct pipe_image_view
{
- struct pipe_reference reference;
struct pipe_resource *resource; /**< resource into which this is a view */
- struct pipe_context *context; /**< context this view belongs to */
enum pipe_format format; /**< typed PIPE_FORMAT_x */
union {
@@ -678,6 +676,45 @@ struct pipe_blit_info
boolean alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) */
};
+/**
+ * Information to describe a launch_grid call.
+ */
+struct pipe_grid_info
+{
+ /**
+ * For drivers that use PIPE_SHADER_IR_LLVM as their prefered IR, this value
+ * will be the index of the kernel in the opencl.kernels metadata list.
+ */
+ uint32_t pc;
+
+ /**
+ * Will be used to initialize the INPUT resource, and it should point to a
+ * buffer of at least pipe_compute_state::req_input_mem bytes.
+ */
+ void *input;
+
+ /**
+ * Determine the layout of the working block (in thread units) to be used.
+ */
+ uint block[3];
+
+ /**
+ * Determine the layout of the grid (in block units) to be used.
+ */
+ uint grid[3];
+
+ /* Indirect compute parameters resource: If not NULL, block sizes are taken
+ * from this buffer instead, which is laid out as follows:
+ *
+ * struct {
+ * uint32_t num_blocks_x;
+ * uint32_t num_blocks_y;
+ * uint32_t num_blocks_z;
+ * };
+ */
+ struct pipe_resource *indirect;
+ unsigned indirect_offset; /**< must be 4 byte aligned */
+};
/**
* Structure used as a header for serialized LLVM programs.
diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp
index a226ec1a752..8396be91553 100644
--- a/src/gallium/state_trackers/clover/core/kernel.cpp
+++ b/src/gallium/state_trackers/clover/core/kernel.cpp
@@ -55,6 +55,7 @@ kernel::launch(command_queue &q,
const auto reduced_grid_size =
map(divides(), grid_size, block_size);
void *st = exec.bind(&q, grid_offset);
+ struct pipe_grid_info info;
// The handles are created during exec_context::bind(), so we need make
// sure to call exec_context::bind() before retrieving them.
@@ -74,11 +75,13 @@ kernel::launch(command_queue &q,
q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
exec.g_buffers.data(), g_handles.data());
- q.pipe->launch_grid(q.pipe,
- pad_vector(q, block_size, 1).data(),
- pad_vector(q, reduced_grid_size, 1).data(),
- find(name_equals(_name), m.syms).offset,
- exec.input.data());
+ // Fill information for the launch_grid() call.
+ copy(pad_vector(q, block_size, 1), info.block);
+ copy(pad_vector(q, reduced_grid_size, 1), info.grid);
+ info.pc = find(name_equals(_name), m.syms).offset;
+ info.input = exec.input.data();
+
+ q.pipe->launch_grid(q.pipe, &info);
q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
diff --git a/src/gallium/state_trackers/nine/Makefile.sources b/src/gallium/state_trackers/nine/Makefile.sources
index 8d178d4b18f..05eb1655376 100644
--- a/src/gallium/state_trackers/nine/Makefile.sources
+++ b/src/gallium/state_trackers/nine/Makefile.sources
@@ -32,6 +32,7 @@ C_SOURCES := \
nineexoverlayextension.h \
nine_ff.c \
nine_ff.h \
+ nine_flags.h \
nine_helpers.c \
nine_helpers.h \
nine_lock.c \
diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c
index 8428b1bd7eb..48e1e44c1bf 100644
--- a/src/gallium/state_trackers/nine/adapter9.c
+++ b/src/gallium/state_trackers/nine/adapter9.c
@@ -146,7 +146,7 @@ NineAdapter9_GetScreen( struct NineAdapter9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_GetAdapterIdentifier( struct NineAdapter9 *This,
DWORD Flags,
D3DADAPTER_IDENTIFIER9 *pIdentifier )
@@ -182,7 +182,7 @@ backbuffer_format( D3DFORMAT dfmt,
return FALSE;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDeviceType( struct NineAdapter9 *This,
D3DDEVTYPE DevType,
D3DFORMAT AdapterFormat,
@@ -207,11 +207,11 @@ NineAdapter9_CheckDeviceType( struct NineAdapter9 *This,
dfmt = d3d9_to_pipe_format_checked(screen, AdapterFormat, PIPE_TEXTURE_2D,
1,
PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SHARED, FALSE);
+ PIPE_BIND_SHARED, FALSE, FALSE);
bfmt = d3d9_to_pipe_format_checked(screen, BackBufferFormat, PIPE_TEXTURE_2D,
1,
PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SHARED, FALSE);
+ PIPE_BIND_SHARED, FALSE, FALSE);
if (dfmt == PIPE_FORMAT_NONE || bfmt == PIPE_FORMAT_NONE) {
DBG("Unsupported Adapter/BackBufferFormat.\n");
return D3DERR_NOTAVAILABLE;
@@ -241,7 +241,7 @@ display_format( D3DFORMAT fmt,
return FALSE;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DFORMAT AdapterFormat,
@@ -270,7 +270,7 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This,
return hr;
pf = d3d9_to_pipe_format_checked(screen, AdapterFormat, PIPE_TEXTURE_2D, 0,
PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SHARED, FALSE);
+ PIPE_BIND_SHARED, FALSE, FALSE);
if (pf == PIPE_FORMAT_NONE) {
DBG("AdapterFormat %s not available.\n",
d3dformat_to_string(AdapterFormat));
@@ -332,14 +332,16 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This,
srgb = (Usage & (D3DUSAGE_QUERY_SRGBREAD | D3DUSAGE_QUERY_SRGBWRITE)) != 0;
- pf = d3d9_to_pipe_format_checked(screen, CheckFormat, target, 0, bind, srgb);
+ pf = d3d9_to_pipe_format_checked(screen, CheckFormat, target,
+ 0, bind, srgb, FALSE);
if (pf == PIPE_FORMAT_NONE) {
DBG("NOT AVAILABLE\n");
return D3DERR_NOTAVAILABLE;
}
- /* we support ATI1 and ATI2 hack only for 2D textures */
- if (RType != D3DRTYPE_TEXTURE && (CheckFormat == D3DFMT_ATI1 || CheckFormat == D3DFMT_ATI2))
+ /* we support ATI1 and ATI2 hack only for 2D and Cube textures */
+ if (RType != D3DRTYPE_TEXTURE && RType != D3DRTYPE_CUBETEXTURE &&
+ (CheckFormat == D3DFMT_ATI1 || CheckFormat == D3DFMT_ATI2))
return D3DERR_NOTAVAILABLE;
/* if (Usage & D3DUSAGE_NONSECURE) { don't know the implications of this } */
/* if (Usage & D3DUSAGE_SOFTWAREPROCESSING) { we can always support this } */
@@ -349,7 +351,7 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DFORMAT SurfaceFormat,
@@ -378,7 +380,7 @@ NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This,
PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_RENDER_TARGET;
pf = d3d9_to_pipe_format_checked(screen, SurfaceFormat, PIPE_TEXTURE_2D,
- MultiSampleType, bind, FALSE);
+ MultiSampleType, bind, FALSE, FALSE);
if (pf == PIPE_FORMAT_NONE) {
DBG("%s with %u samples not available.\n",
@@ -392,7 +394,7 @@ NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDepthStencilMatch( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DFORMAT AdapterFormat,
@@ -417,16 +419,16 @@ NineAdapter9_CheckDepthStencilMatch( struct NineAdapter9 *This,
dfmt = d3d9_to_pipe_format_checked(screen, AdapterFormat, PIPE_TEXTURE_2D, 0,
PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SHARED, FALSE);
+ PIPE_BIND_SHARED, FALSE, FALSE);
bfmt = d3d9_to_pipe_format_checked(screen, RenderTargetFormat,
PIPE_TEXTURE_2D, 0,
- PIPE_BIND_RENDER_TARGET, FALSE);
+ PIPE_BIND_RENDER_TARGET, FALSE, FALSE);
if (RenderTargetFormat == D3DFMT_NULL)
bfmt = dfmt;
zsfmt = d3d9_to_pipe_format_checked(screen, DepthStencilFormat,
PIPE_TEXTURE_2D, 0,
d3d9_get_pipe_depth_format_bindings(DepthStencilFormat),
- FALSE);
+ FALSE, FALSE);
if (dfmt == PIPE_FORMAT_NONE ||
bfmt == PIPE_FORMAT_NONE ||
zsfmt == PIPE_FORMAT_NONE) {
@@ -436,7 +438,7 @@ NineAdapter9_CheckDepthStencilMatch( struct NineAdapter9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDeviceFormatConversion( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DFORMAT SourceFormat,
@@ -461,10 +463,10 @@ NineAdapter9_CheckDeviceFormatConversion( struct NineAdapter9 *This,
dfmt = d3d9_to_pipe_format_checked(screen, TargetFormat, PIPE_TEXTURE_2D, 1,
PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SHARED, FALSE);
+ PIPE_BIND_SHARED, FALSE, FALSE);
bfmt = d3d9_to_pipe_format_checked(screen, SourceFormat, PIPE_TEXTURE_2D, 1,
PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SHARED, FALSE);
+ PIPE_BIND_SHARED, FALSE, FALSE);
if (dfmt == PIPE_FORMAT_NONE || bfmt == PIPE_FORMAT_NONE) {
DBG("%s to %s not supported.\n",
@@ -476,7 +478,7 @@ NineAdapter9_CheckDeviceFormatConversion( struct NineAdapter9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DCAPS9 *pCaps )
@@ -932,7 +934,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CreateDevice( struct NineAdapter9 *This,
UINT RealAdapter,
D3DDEVTYPE DeviceType,
@@ -992,7 +994,7 @@ NineAdapter9_CreateDevice( struct NineAdapter9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CreateDeviceEx( struct NineAdapter9 *This,
UINT RealAdapter,
D3DDEVTYPE DeviceType,
diff --git a/src/gallium/state_trackers/nine/adapter9.h b/src/gallium/state_trackers/nine/adapter9.h
index 2129ec8edc0..aaf7435fcda 100644
--- a/src/gallium/state_trackers/nine/adapter9.h
+++ b/src/gallium/state_trackers/nine/adapter9.h
@@ -67,19 +67,19 @@ NineAdapter9_ctor( struct NineAdapter9 *This,
void
NineAdapter9_dtor( struct NineAdapter9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_GetAdapterIdentifier( struct NineAdapter9 *This,
DWORD Flags,
D3DADAPTER_IDENTIFIER9 *pIdentifier );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDeviceType( struct NineAdapter9 *This,
D3DDEVTYPE DevType,
D3DFORMAT AdapterFormat,
D3DFORMAT BackBufferFormat,
BOOL bWindowed );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DFORMAT AdapterFormat,
@@ -87,7 +87,7 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This,
D3DRESOURCETYPE RType,
D3DFORMAT CheckFormat );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DFORMAT SurfaceFormat,
@@ -95,25 +95,25 @@ NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This,
D3DMULTISAMPLE_TYPE MultiSampleType,
DWORD *pQualityLevels );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDepthStencilMatch( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DFORMAT AdapterFormat,
D3DFORMAT RenderTargetFormat,
D3DFORMAT DepthStencilFormat );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CheckDeviceFormatConversion( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DFORMAT SourceFormat,
D3DFORMAT TargetFormat );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
D3DDEVTYPE DeviceType,
D3DCAPS9 *pCaps );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CreateDevice( struct NineAdapter9 *This,
UINT RealAdapter,
D3DDEVTYPE DeviceType,
@@ -124,7 +124,7 @@ NineAdapter9_CreateDevice( struct NineAdapter9 *This,
ID3DPresentGroup *pPresentationGroup,
IDirect3DDevice9 **ppReturnedDeviceInterface );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAdapter9_CreateDeviceEx( struct NineAdapter9 *This,
UINT RealAdapter,
D3DDEVTYPE DeviceType,
diff --git a/src/gallium/state_trackers/nine/authenticatedchannel9.c b/src/gallium/state_trackers/nine/authenticatedchannel9.c
index 44ad87c956f..45fca3a2975 100644
--- a/src/gallium/state_trackers/nine/authenticatedchannel9.c
+++ b/src/gallium/state_trackers/nine/authenticatedchannel9.c
@@ -24,14 +24,14 @@
#define DBG_CHANNEL DBG_AUTHENTICATEDCHANNEL
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_GetCertificateSize( struct NineAuthenticatedChannel9 *This,
UINT *pCertificateSize )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This,
UINT CertifacteSize,
BYTE *ppCertificate )
@@ -39,7 +39,7 @@ NineAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9 *This,
UINT DataSize,
void *pData )
@@ -47,7 +47,7 @@ NineAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This,
UINT InputSize,
const void *pInput,
@@ -57,7 +57,7 @@ NineAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_Configure( struct NineAuthenticatedChannel9 *This,
UINT InputSize,
const void *pInput,
diff --git a/src/gallium/state_trackers/nine/authenticatedchannel9.h b/src/gallium/state_trackers/nine/authenticatedchannel9.h
index 63cb2269db4..b18848235c8 100644
--- a/src/gallium/state_trackers/nine/authenticatedchannel9.h
+++ b/src/gallium/state_trackers/nine/authenticatedchannel9.h
@@ -35,28 +35,28 @@ NineAuthenticatedChannel9( void *data )
return (struct NineAuthenticatedChannel9 *)data;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_GetCertificateSize( struct NineAuthenticatedChannel9 *This,
UINT *pCertificateSize );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This,
UINT CertifacteSize,
BYTE *ppCertificate );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9 *This,
UINT DataSize,
void *pData );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This,
UINT InputSize,
const void *pInput,
UINT OutputSize,
void *pOutput );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineAuthenticatedChannel9_Configure( struct NineAuthenticatedChannel9 *This,
UINT InputSize,
const void *pInput,
diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c
index 7a0959a8f3e..c4eb813e9f8 100644
--- a/src/gallium/state_trackers/nine/basetexture9.c
+++ b/src/gallium/state_trackers/nine/basetexture9.c
@@ -109,7 +109,7 @@ NineBaseTexture9_dtor( struct NineBaseTexture9 *This )
NineResource9_dtor(&This->base);
}
-DWORD WINAPI
+DWORD NINE_WINAPI
NineBaseTexture9_SetLOD( struct NineBaseTexture9 *This,
DWORD LODNew )
{
@@ -130,7 +130,7 @@ NineBaseTexture9_SetLOD( struct NineBaseTexture9 *This,
return old;
}
-DWORD WINAPI
+DWORD NINE_WINAPI
NineBaseTexture9_GetLOD( struct NineBaseTexture9 *This )
{
DBG("This=%p\n", This);
@@ -138,7 +138,7 @@ NineBaseTexture9_GetLOD( struct NineBaseTexture9 *This )
return This->managed.lod;
}
-DWORD WINAPI
+DWORD NINE_WINAPI
NineBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This )
{
DBG("This=%p\n", This);
@@ -148,7 +148,7 @@ NineBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This )
return This->base.info.last_level + 1;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This,
D3DTEXTUREFILTERTYPE FilterType )
{
@@ -165,7 +165,7 @@ NineBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This,
return D3D_OK;
}
-D3DTEXTUREFILTERTYPE WINAPI
+D3DTEXTUREFILTERTYPE NINE_WINAPI
NineBaseTexture9_GetAutoGenFilterType( struct NineBaseTexture9 *This )
{
DBG("This=%p\n", This);
@@ -383,7 +383,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This )
return D3D_OK;
}
-void WINAPI
+void NINE_WINAPI
NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This )
{
struct pipe_resource *resource;
@@ -580,7 +580,7 @@ NineBaseTexture9_UpdateSamplerView( struct NineBaseTexture9 *This,
return This->view ? D3D_OK : D3DERR_DRIVERINTERNALERROR;
}
-void WINAPI
+void NINE_WINAPI
NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This )
{
DBG("This=%p\n", This);
diff --git a/src/gallium/state_trackers/nine/basetexture9.h b/src/gallium/state_trackers/nine/basetexture9.h
index b19a62195fc..980c4a556b2 100644
--- a/src/gallium/state_trackers/nine/basetexture9.h
+++ b/src/gallium/state_trackers/nine/basetexture9.h
@@ -72,27 +72,27 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This,
void
NineBaseTexture9_dtor( struct NineBaseTexture9 *This );
-DWORD WINAPI
+DWORD NINE_WINAPI
NineBaseTexture9_SetLOD( struct NineBaseTexture9 *This,
DWORD LODNew );
-DWORD WINAPI
+DWORD NINE_WINAPI
NineBaseTexture9_GetLOD( struct NineBaseTexture9 *This );
-DWORD WINAPI
+DWORD NINE_WINAPI
NineBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This,
D3DTEXTUREFILTERTYPE FilterType );
-D3DTEXTUREFILTERTYPE WINAPI
+D3DTEXTUREFILTERTYPE NINE_WINAPI
NineBaseTexture9_GetAutoGenFilterType( struct NineBaseTexture9 *This );
-void WINAPI
+void NINE_WINAPI
NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This );
-void WINAPI
+void NINE_WINAPI
NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This );
void
diff --git a/src/gallium/state_trackers/nine/buffer9.c b/src/gallium/state_trackers/nine/buffer9.c
index b4b91ec2a02..e066fc59f45 100644
--- a/src/gallium/state_trackers/nine/buffer9.c
+++ b/src/gallium/state_trackers/nine/buffer9.c
@@ -93,7 +93,26 @@ NineBuffer9_ctor( struct NineBuffer9 *This,
hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
Type, Pool, Usage);
- return hr;
+
+ if (FAILED(hr))
+ return hr;
+
+ if (Pool == D3DPOOL_MANAGED) {
+ This->managed.data = align_malloc(
+ nine_format_get_level_alloc_size(This->base.info.format,
+ Size, 1, 0), 32);
+ if (!This->managed.data)
+ return E_OUTOFMEMORY;
+ memset(This->managed.data, 0, Size);
+ This->managed.dirty = TRUE;
+ u_box_1d(0, Size, &This->managed.dirty_box);
+ list_inithead(&This->managed.list);
+ list_inithead(&This->managed.list2);
+ list_add(&This->managed.list, &pParams->device->update_buffers);
+ list_add(&This->managed.list2, &pParams->device->managed_buffers);
+ }
+
+ return D3D_OK;
}
void
@@ -106,6 +125,15 @@ NineBuffer9_dtor( struct NineBuffer9 *This )
FREE(This->maps);
}
+ if (This->base.pool == D3DPOOL_MANAGED) {
+ if (This->managed.data)
+ align_free(This->managed.data);
+ if (This->managed.list.prev != NULL && This->managed.list.next != NULL)
+ list_del(&This->managed.list);
+ if (This->managed.list2.prev != NULL && This->managed.list2.next != NULL)
+ list_del(&This->managed.list2);
+ }
+
NineResource9_dtor(&This->base);
}
@@ -115,7 +143,7 @@ NineBuffer9_GetResource( struct NineBuffer9 *This )
return NineResource9_GetResource(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineBuffer9_Lock( struct NineBuffer9 *This,
UINT OffsetToLock,
UINT SizeToLock,
@@ -138,6 +166,28 @@ NineBuffer9_Lock( struct NineBuffer9 *This,
D3DLOCK_READONLY |
D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL);
+ if (SizeToLock == 0) {
+ SizeToLock = This->size - OffsetToLock;
+ user_warn(OffsetToLock != 0);
+ }
+
+ u_box_1d(OffsetToLock, SizeToLock, &box);
+
+ if (This->base.pool == D3DPOOL_MANAGED) {
+ if (!This->managed.dirty) {
+ assert(LIST_IS_EMPTY(&This->managed.list));
+ list_add(&This->managed.list, &This->base.base.device->update_buffers);
+ This->managed.dirty = TRUE;
+ This->managed.dirty_box = box;
+ } else {
+ u_box_union_2d(&This->managed.dirty_box, &This->managed.dirty_box, &box);
+ }
+ *ppbData = (char *)This->managed.data + OffsetToLock;
+ DBG("returning pointer %p\n", *ppbData);
+ This->nmaps++;
+ return D3D_OK;
+ }
+
if (This->nmaps == This->maxmaps) {
struct pipe_transfer **newmaps =
REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps,
@@ -149,13 +199,6 @@ NineBuffer9_Lock( struct NineBuffer9 *This,
This->maps = newmaps;
}
- if (SizeToLock == 0) {
- SizeToLock = This->size - OffsetToLock;
- user_warn(OffsetToLock != 0);
- }
-
- u_box_1d(OffsetToLock, SizeToLock, &box);
-
data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
usage, &box, &This->maps[This->nmaps]);
@@ -178,12 +221,28 @@ NineBuffer9_Lock( struct NineBuffer9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineBuffer9_Unlock( struct NineBuffer9 *This )
{
DBG("This=%p\n", This);
user_assert(This->nmaps > 0, D3DERR_INVALIDCALL);
- This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]);
+ if (This->base.pool != D3DPOOL_MANAGED)
+ This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]);
+ else
+ This->nmaps--;
return D3D_OK;
}
+
+void
+NineBuffer9_SetDirty( struct NineBuffer9 *This )
+{
+ assert(This->base.pool == D3DPOOL_MANAGED);
+
+ if (!This->managed.dirty) {
+ assert(LIST_IS_EMPTY(&This->managed.list));
+ list_add(&This->managed.list, &This->base.base.device->update_buffers);
+ This->managed.dirty = TRUE;
+ }
+ u_box_1d(0, This->size, &This->managed.dirty_box);
+}
diff --git a/src/gallium/state_trackers/nine/buffer9.h b/src/gallium/state_trackers/nine/buffer9.h
index 1afd9a996ea..8bdb4326a4c 100644
--- a/src/gallium/state_trackers/nine/buffer9.h
+++ b/src/gallium/state_trackers/nine/buffer9.h
@@ -25,6 +25,9 @@
#define _NINE_BUFFER9_H_
#include "resource9.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/list.h"
struct pipe_screen;
struct pipe_context;
@@ -39,6 +42,15 @@ struct NineBuffer9
struct pipe_transfer **maps;
int nmaps, maxmaps;
UINT size;
+
+ /* Specific to managed buffers */
+ struct {
+ void *data;
+ boolean dirty;
+ struct pipe_box dirty_box;
+ struct list_head list; /* for update_buffers */
+ struct list_head list2; /* for managed_buffers */
+ } managed;
};
static inline struct NineBuffer9 *
NineBuffer9( void *data )
@@ -60,14 +72,30 @@ NineBuffer9_dtor( struct NineBuffer9 *This );
struct pipe_resource *
NineBuffer9_GetResource( struct NineBuffer9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineBuffer9_Lock( struct NineBuffer9 *This,
UINT OffsetToLock,
UINT SizeToLock,
void **ppbData,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineBuffer9_Unlock( struct NineBuffer9 *This );
+static inline void
+NineBuffer9_Upload( struct NineBuffer9 *This )
+{
+ struct pipe_context *pipe = This->pipe;
+
+ assert(This->base.pool == D3DPOOL_MANAGED && This->managed.dirty);
+ pipe->transfer_inline_write(pipe, This->base.resource, 0, 0,
+ &This->managed.dirty_box,
+ (char *)This->managed.data + This->managed.dirty_box.x,
+ This->size, This->size);
+ This->managed.dirty = FALSE;
+}
+
+void
+NineBuffer9_SetDirty( struct NineBuffer9 *This );
+
#endif /* _NINE_BUFFER9_H_ */
diff --git a/src/gallium/state_trackers/nine/cryptosession9.c b/src/gallium/state_trackers/nine/cryptosession9.c
index 2622f2b32e4..c656f72a215 100644
--- a/src/gallium/state_trackers/nine/cryptosession9.c
+++ b/src/gallium/state_trackers/nine/cryptosession9.c
@@ -24,14 +24,14 @@
#define DBG_CHANNEL DBG_CRYPTOSESSION
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_GetCertificateSize( struct NineCryptoSession9 *This,
UINT *pCertificateSize )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_GetCertificate( struct NineCryptoSession9 *This,
UINT CertifacteSize,
BYTE *ppCertificate )
@@ -39,7 +39,7 @@ NineCryptoSession9_GetCertificate( struct NineCryptoSession9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This,
UINT DataSize,
void *pData )
@@ -47,7 +47,7 @@ NineCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This,
IDirect3DSurface9 *pSrcSurface,
IDirect3DSurface9 *pDstSurface,
@@ -57,7 +57,7 @@ NineCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This,
IDirect3DSurface9 *pSrcSurface,
IDirect3DSurface9 *pDstSurface,
@@ -69,7 +69,7 @@ NineCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This,
IDirect3DSurface9 *pSrcSurface,
UINT *pSurfacePitch )
@@ -77,7 +77,7 @@ NineCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This,
void *pRandomNumber,
UINT RandomNumberSize )
@@ -85,13 +85,13 @@ NineCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_FinishSessionKeyRefresh( struct NineCryptoSession9 *This )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_GetEncryptionBltKey( struct NineCryptoSession9 *This,
void *pReadbackKey,
UINT KeySize )
diff --git a/src/gallium/state_trackers/nine/cryptosession9.h b/src/gallium/state_trackers/nine/cryptosession9.h
index d1eab72eb37..9904455a000 100644
--- a/src/gallium/state_trackers/nine/cryptosession9.h
+++ b/src/gallium/state_trackers/nine/cryptosession9.h
@@ -35,28 +35,28 @@ NineCryptoSession9( void *data )
return (struct NineCryptoSession9 *)data;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_GetCertificateSize( struct NineCryptoSession9 *This,
UINT *pCertificateSize );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_GetCertificate( struct NineCryptoSession9 *This,
UINT CertifacteSize,
BYTE *ppCertificate );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This,
UINT DataSize,
void *pData );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This,
IDirect3DSurface9 *pSrcSurface,
IDirect3DSurface9 *pDstSurface,
UINT DstSurfaceSize,
void *pIV );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This,
IDirect3DSurface9 *pSrcSurface,
IDirect3DSurface9 *pDstSurface,
@@ -65,20 +65,20 @@ NineCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This,
void *pContentKey,
void *pIV );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This,
IDirect3DSurface9 *pSrcSurface,
UINT *pSurfacePitch );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This,
void *pRandomNumber,
UINT RandomNumberSize );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_FinishSessionKeyRefresh( struct NineCryptoSession9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCryptoSession9_GetEncryptionBltKey( struct NineCryptoSession9 *This,
void *pReadbackKey,
UINT KeySize );
diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c
index 460cc853942..11000942d3a 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.c
+++ b/src/gallium/state_trackers/nine/cubetexture9.c
@@ -53,21 +53,22 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
This, pParams, EdgeLength, Levels, Usage,
Format, Pool, pSharedHandle);
- user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP) ||
- (Pool != D3DPOOL_SYSTEMMEM && Levels <= 1), D3DERR_INVALIDCALL);
+ user_assert(EdgeLength, D3DERR_INVALIDCALL);
+ /* user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); */
user_assert(!pSharedHandle, D3DERR_INVALIDCALL); /* TODO */
+ user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP) ||
+ (Pool != D3DPOOL_SYSTEMMEM && Levels <= 1), D3DERR_INVALIDCALL);
+
if (Usage & D3DUSAGE_AUTOGENMIPMAP)
Levels = 0;
pf = d3d9_to_pipe_format_checked(screen, Format, PIPE_TEXTURE_CUBE, 0,
- PIPE_BIND_SAMPLER_VIEW, FALSE);
- if (pf == PIPE_FORMAT_NONE)
- return D3DERR_INVALIDCALL;
+ PIPE_BIND_SAMPLER_VIEW, FALSE,
+ Pool == D3DPOOL_SCRATCH);
- /* We support ATI1 and ATI2 hacks only for 2D textures */
- if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2)
+ if (pf == PIPE_FORMAT_NONE)
return D3DERR_INVALIDCALL;
if (compressed_format(Format)) {
@@ -186,7 +187,7 @@ NineCubeTexture9_dtor( struct NineCubeTexture9 *This )
NineBaseTexture9_dtor(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This,
UINT Level,
D3DSURFACE_DESC *pDesc )
@@ -202,7 +203,7 @@ NineCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
UINT Level,
@@ -224,7 +225,7 @@ NineCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_LockRect( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
UINT Level,
@@ -245,7 +246,7 @@ NineCubeTexture9_LockRect( struct NineCubeTexture9 *This,
return NineSurface9_LockRect(This->surfaces[s], pLockedRect, pRect, Flags);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_UnlockRect( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
UINT Level )
@@ -260,7 +261,7 @@ NineCubeTexture9_UnlockRect( struct NineCubeTexture9 *This,
return NineSurface9_UnlockRect(This->surfaces[s]);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
const RECT *pDirtyRect )
diff --git a/src/gallium/state_trackers/nine/cubetexture9.h b/src/gallium/state_trackers/nine/cubetexture9.h
index 999715c0a74..129789d0d69 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.h
+++ b/src/gallium/state_trackers/nine/cubetexture9.h
@@ -48,18 +48,18 @@ NineCubeTexture9_new( struct NineDevice9 *pDevice,
struct NineCubeTexture9 **ppOut,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This,
UINT Level,
D3DSURFACE_DESC *pDesc );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
UINT Level,
IDirect3DSurface9 **ppCubeMapSurface );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_LockRect( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
UINT Level,
@@ -67,12 +67,12 @@ NineCubeTexture9_LockRect( struct NineCubeTexture9 *This,
const RECT *pRect,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_UnlockRect( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
UINT Level );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
const RECT *pDirtyRect );
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 475ef96788e..e4403f0db03 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -147,7 +147,9 @@ NineDevice9_ctor( struct NineDevice9 *This,
if (FAILED(hr)) { return hr; }
+ list_inithead(&This->update_buffers);
list_inithead(&This->update_textures);
+ list_inithead(&This->managed_buffers);
list_inithead(&This->managed_textures);
This->screen = pScreen;
@@ -540,7 +542,7 @@ NineDevice9_ResumeRecording( struct NineDevice9 *This )
}
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_TestCooperativeLevel( struct NineDevice9 *This )
{
if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
@@ -553,26 +555,35 @@ NineDevice9_TestCooperativeLevel( struct NineDevice9 *This )
return D3D_OK;
}
-UINT WINAPI
+UINT NINE_WINAPI
NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This )
{
return This->available_texture_mem;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_EvictManagedResources( struct NineDevice9 *This )
{
struct NineBaseTexture9 *tex;
+ struct NineBuffer9 *buf;
DBG("This=%p\n", This);
LIST_FOR_EACH_ENTRY(tex, &This->managed_textures, list2) {
NineBaseTexture9_UnLoad(tex);
}
+ /* Vertex/index buffers don't take a lot of space and aren't accounted
+ * for d3d memory usage. Instead of actually freeing from memory,
+ * just mark the buffer dirty to trigger a re-upload later. We
+ * could just ignore, but some bad behaving apps could rely on it (if
+ * they write outside the locked regions typically). */
+ LIST_FOR_EACH_ENTRY(buf, &This->managed_buffers, managed.list2) {
+ NineBuffer9_SetDirty(buf);
+ }
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetDirect3D( struct NineDevice9 *This,
IDirect3D9 **ppD3D9 )
{
@@ -582,7 +593,7 @@ NineDevice9_GetDirect3D( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetDeviceCaps( struct NineDevice9 *This,
D3DCAPS9 *pCaps )
{
@@ -591,7 +602,7 @@ NineDevice9_GetDeviceCaps( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetDisplayMode( struct NineDevice9 *This,
UINT iSwapChain,
D3DDISPLAYMODE *pMode )
@@ -603,7 +614,7 @@ NineDevice9_GetDisplayMode( struct NineDevice9 *This,
return NineSwapChain9_GetDisplayMode(This->swapchains[iSwapChain], pMode);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetCreationParameters( struct NineDevice9 *This,
D3DDEVICE_CREATION_PARAMETERS *pParameters )
{
@@ -612,7 +623,7 @@ NineDevice9_GetCreationParameters( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetCursorProperties( struct NineDevice9 *This,
UINT XHotSpot,
UINT YHotSpot,
@@ -688,7 +699,7 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
return D3D_OK;
}
-void WINAPI
+void NINE_WINAPI
NineDevice9_SetCursorPosition( struct NineDevice9 *This,
int X,
int Y,
@@ -705,7 +716,7 @@ NineDevice9_SetCursorPosition( struct NineDevice9 *This,
This->cursor.software = ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos) != D3D_OK;
}
-BOOL WINAPI
+BOOL NINE_WINAPI
NineDevice9_ShowCursor( struct NineDevice9 *This,
BOOL bShow )
{
@@ -720,7 +731,7 @@ NineDevice9_ShowCursor( struct NineDevice9 *This,
return old;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters,
IDirect3DSwapChain9 **pSwapChain )
@@ -755,7 +766,7 @@ NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetSwapChain( struct NineDevice9 *This,
UINT iSwapChain,
IDirect3DSwapChain9 **pSwapChain )
@@ -771,13 +782,13 @@ NineDevice9_GetSwapChain( struct NineDevice9 *This,
return D3D_OK;
}
-UINT WINAPI
+UINT NINE_WINAPI
NineDevice9_GetNumberOfSwapChains( struct NineDevice9 *This )
{
return This->nswapchains;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_Reset( struct NineDevice9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters )
{
@@ -810,7 +821,7 @@ NineDevice9_Reset( struct NineDevice9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_Present( struct NineDevice9 *This,
const RECT *pSourceRect,
const RECT *pDestRect,
@@ -833,7 +844,7 @@ NineDevice9_Present( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetBackBuffer( struct NineDevice9 *This,
UINT iSwapChain,
UINT iBackBuffer,
@@ -849,7 +860,7 @@ NineDevice9_GetBackBuffer( struct NineDevice9 *This,
iBackBuffer, Type, ppBackBuffer);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetRasterStatus( struct NineDevice9 *This,
UINT iSwapChain,
D3DRASTER_STATUS *pRasterStatus )
@@ -861,14 +872,14 @@ NineDevice9_GetRasterStatus( struct NineDevice9 *This,
pRasterStatus);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetDialogBoxMode( struct NineDevice9 *This,
BOOL bEnableDialogs )
{
STUB(D3DERR_INVALIDCALL);
}
-void WINAPI
+void NINE_WINAPI
NineDevice9_SetGammaRamp( struct NineDevice9 *This,
UINT iSwapChain,
DWORD Flags,
@@ -887,7 +898,7 @@ NineDevice9_SetGammaRamp( struct NineDevice9 *This,
}
}
-void WINAPI
+void NINE_WINAPI
NineDevice9_GetGammaRamp( struct NineDevice9 *This,
UINT iSwapChain,
D3DGAMMARAMP *pRamp )
@@ -901,7 +912,7 @@ NineDevice9_GetGammaRamp( struct NineDevice9 *This,
*pRamp = This->swapchains[iSwapChain]->gamma;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateTexture( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -925,15 +936,6 @@ NineDevice9_CreateTexture( struct NineDevice9 *This,
D3DUSAGE_SOFTWAREPROCESSING | D3DUSAGE_TEXTAPI;
*ppTexture = NULL;
- user_assert(Width && Height, D3DERR_INVALIDCALL);
- user_assert(!pSharedHandle || This->ex, D3DERR_INVALIDCALL);
- /* When is used shared handle, Pool must be
- * SYSTEMMEM with Levels 1 or DEFAULT with any Levels */
- user_assert(!pSharedHandle || Pool != D3DPOOL_SYSTEMMEM || Levels == 1,
- D3DERR_INVALIDCALL);
- user_assert(!pSharedHandle || Pool == D3DPOOL_SYSTEMMEM || Pool == D3DPOOL_DEFAULT,
- D3DERR_INVALIDCALL);
- user_assert((Usage != D3DUSAGE_AUTOGENMIPMAP || Levels <= 1), D3DERR_INVALIDCALL);
hr = NineTexture9_new(This, Width, Height, Levels, Usage, Format, Pool,
&tex, pSharedHandle);
@@ -943,7 +945,7 @@ NineDevice9_CreateTexture( struct NineDevice9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateVolumeTexture( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -967,8 +969,6 @@ NineDevice9_CreateVolumeTexture( struct NineDevice9 *This,
D3DUSAGE_SOFTWAREPROCESSING;
*ppVolumeTexture = NULL;
- user_assert(Width && Height && Depth, D3DERR_INVALIDCALL);
- user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
hr = NineVolumeTexture9_new(This, Width, Height, Depth, Levels,
Usage, Format, Pool, &tex, pSharedHandle);
@@ -978,7 +978,7 @@ NineDevice9_CreateVolumeTexture( struct NineDevice9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateCubeTexture( struct NineDevice9 *This,
UINT EdgeLength,
UINT Levels,
@@ -1001,8 +1001,6 @@ NineDevice9_CreateCubeTexture( struct NineDevice9 *This,
D3DUSAGE_SOFTWAREPROCESSING;
*ppCubeTexture = NULL;
- user_assert(EdgeLength, D3DERR_INVALIDCALL);
- user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
hr = NineCubeTexture9_new(This, EdgeLength, Levels, Usage, Format, Pool,
&tex, pSharedHandle);
@@ -1012,7 +1010,7 @@ NineDevice9_CreateCubeTexture( struct NineDevice9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateVertexBuffer( struct NineDevice9 *This,
UINT Length,
DWORD Usage,
@@ -1050,7 +1048,7 @@ NineDevice9_CreateVertexBuffer( struct NineDevice9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateIndexBuffer( struct NineDevice9 *This,
UINT Length,
DWORD Usage,
@@ -1137,7 +1135,10 @@ create_zs_or_rt_surface(struct NineDevice9 *This,
}
templ.format = d3d9_to_pipe_format_checked(screen, Format, templ.target,
templ.nr_samples, templ.bind,
- FALSE);
+ FALSE, Pool == D3DPOOL_SCRATCH);
+
+ if (templ.format == PIPE_FORMAT_NONE && Format != D3DFMT_NULL)
+ return D3DERR_INVALIDCALL;
desc.Format = Format;
desc.Type = D3DRTYPE_SURFACE;
@@ -1178,7 +1179,7 @@ create_zs_or_rt_surface(struct NineDevice9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateRenderTarget( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -1196,7 +1197,7 @@ NineDevice9_CreateRenderTarget( struct NineDevice9 *This,
Lockable, ppSurface, pSharedHandle);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateDepthStencilSurface( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -1216,7 +1217,7 @@ NineDevice9_CreateDepthStencilSurface( struct NineDevice9 *This,
Discard, ppSurface, pSharedHandle);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_UpdateSurface( struct NineDevice9 *This,
IDirect3DSurface9 *pSourceSurface,
const RECT *pSourceRect,
@@ -1309,7 +1310,7 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_UpdateTexture( struct NineDevice9 *This,
IDirect3DBaseTexture9 *pSourceTexture,
IDirect3DBaseTexture9 *pDestinationTexture )
@@ -1438,7 +1439,7 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetRenderTargetData( struct NineDevice9 *This,
IDirect3DSurface9 *pRenderTarget,
IDirect3DSurface9 *pDestSurface )
@@ -1463,7 +1464,7 @@ NineDevice9_GetRenderTargetData( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetFrontBufferData( struct NineDevice9 *This,
UINT iSwapChain,
IDirect3DSurface9 *pDestSurface )
@@ -1478,7 +1479,7 @@ NineDevice9_GetFrontBufferData( struct NineDevice9 *This,
pDestSurface);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_StretchRect( struct NineDevice9 *This,
IDirect3DSurface9 *pSourceSurface,
const RECT *pSourceRect,
@@ -1682,7 +1683,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_ColorFill( struct NineDevice9 *This,
IDirect3DSurface9 *pSurface,
const RECT *pRect,
@@ -1749,7 +1750,7 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -1782,7 +1783,7 @@ NineDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetRenderTarget( struct NineDevice9 *This,
DWORD RenderTargetIndex,
IDirect3DSurface9 *pRenderTarget )
@@ -1821,7 +1822,7 @@ NineDevice9_SetRenderTarget( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetRenderTarget( struct NineDevice9 *This,
DWORD RenderTargetIndex,
IDirect3DSurface9 **ppRenderTarget )
@@ -1839,7 +1840,7 @@ NineDevice9_GetRenderTarget( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetDepthStencilSurface( struct NineDevice9 *This,
IDirect3DSurface9 *pNewZStencil )
{
@@ -1852,7 +1853,7 @@ NineDevice9_SetDepthStencilSurface( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetDepthStencilSurface( struct NineDevice9 *This,
IDirect3DSurface9 **ppZStencilSurface )
{
@@ -1866,7 +1867,7 @@ NineDevice9_GetDepthStencilSurface( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_BeginScene( struct NineDevice9 *This )
{
DBG("This=%p\n", This);
@@ -1876,7 +1877,7 @@ NineDevice9_BeginScene( struct NineDevice9 *This )
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_EndScene( struct NineDevice9 *This )
{
DBG("This=%p\n", This);
@@ -1885,7 +1886,7 @@ NineDevice9_EndScene( struct NineDevice9 *This )
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_Clear( struct NineDevice9 *This,
DWORD Count,
const D3DRECT *pRects,
@@ -2047,7 +2048,7 @@ NineDevice9_Clear( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetTransform( struct NineDevice9 *This,
D3DTRANSFORMSTATETYPE State,
const D3DMATRIX *pMatrix )
@@ -2066,7 +2067,7 @@ NineDevice9_SetTransform( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetTransform( struct NineDevice9 *This,
D3DTRANSFORMSTATETYPE State,
D3DMATRIX *pMatrix )
@@ -2077,7 +2078,7 @@ NineDevice9_GetTransform( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_MultiplyTransform( struct NineDevice9 *This,
D3DTRANSFORMSTATETYPE State,
const D3DMATRIX *pMatrix )
@@ -2094,7 +2095,7 @@ NineDevice9_MultiplyTransform( struct NineDevice9 *This,
return NineDevice9_SetTransform(This, State, &T);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetViewport( struct NineDevice9 *This,
const D3DVIEWPORT9 *pViewport )
{
@@ -2110,7 +2111,7 @@ NineDevice9_SetViewport( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetViewport( struct NineDevice9 *This,
D3DVIEWPORT9 *pViewport )
{
@@ -2118,7 +2119,7 @@ NineDevice9_GetViewport( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetMaterial( struct NineDevice9 *This,
const D3DMATERIAL9 *pMaterial )
{
@@ -2136,7 +2137,7 @@ NineDevice9_SetMaterial( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetMaterial( struct NineDevice9 *This,
D3DMATERIAL9 *pMaterial )
{
@@ -2145,7 +2146,7 @@ NineDevice9_GetMaterial( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetLight( struct NineDevice9 *This,
DWORD Index,
const D3DLIGHT9 *pLight )
@@ -2194,7 +2195,7 @@ NineDevice9_SetLight( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetLight( struct NineDevice9 *This,
DWORD Index,
D3DLIGHT9 *pLight )
@@ -2211,7 +2212,7 @@ NineDevice9_GetLight( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_LightEnable( struct NineDevice9 *This,
DWORD Index,
BOOL Enable )
@@ -2261,7 +2262,7 @@ NineDevice9_LightEnable( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetLightEnable( struct NineDevice9 *This,
DWORD Index,
BOOL *pEnable )
@@ -2282,7 +2283,7 @@ NineDevice9_GetLightEnable( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetClipPlane( struct NineDevice9 *This,
DWORD Index,
const float *pPlane )
@@ -2303,7 +2304,7 @@ NineDevice9_SetClipPlane( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetClipPlane( struct NineDevice9 *This,
DWORD Index,
float *pPlane )
@@ -2374,7 +2375,7 @@ NineDevice9_ResolveZ( struct NineDevice9 *This )
#define ALPHA_TO_COVERAGE_ENABLE MAKEFOURCC('A', '2', 'M', '1')
#define ALPHA_TO_COVERAGE_DISABLE MAKEFOURCC('A', '2', 'M', '0')
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetRenderState( struct NineDevice9 *This,
D3DRENDERSTATETYPE State,
DWORD Value )
@@ -2420,7 +2421,7 @@ NineDevice9_SetRenderState( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetRenderState( struct NineDevice9 *This,
D3DRENDERSTATETYPE State,
DWORD *pValue )
@@ -2431,7 +2432,7 @@ NineDevice9_GetRenderState( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateStateBlock( struct NineDevice9 *This,
D3DSTATEBLOCKTYPE Type,
IDirect3DStateBlock9 **ppSB )
@@ -2531,7 +2532,7 @@ NineDevice9_CreateStateBlock( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_BeginStateBlock( struct NineDevice9 *This )
{
HRESULT hr;
@@ -2551,7 +2552,7 @@ NineDevice9_BeginStateBlock( struct NineDevice9 *This )
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_EndStateBlock( struct NineDevice9 *This,
IDirect3DStateBlock9 **ppSB )
{
@@ -2570,21 +2571,21 @@ NineDevice9_EndStateBlock( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetClipStatus( struct NineDevice9 *This,
const D3DCLIPSTATUS9 *pClipStatus )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetClipStatus( struct NineDevice9 *This,
D3DCLIPSTATUS9 *pClipStatus )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetTexture( struct NineDevice9 *This,
DWORD Stage,
IDirect3DBaseTexture9 **ppTexture )
@@ -2605,7 +2606,7 @@ NineDevice9_GetTexture( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetTexture( struct NineDevice9 *This,
DWORD Stage,
IDirect3DBaseTexture9 *pTexture )
@@ -2650,7 +2651,7 @@ NineDevice9_SetTexture( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetTextureStageState( struct NineDevice9 *This,
DWORD Stage,
D3DTEXTURESTAGESTATETYPE Type,
@@ -2666,7 +2667,7 @@ NineDevice9_GetTextureStageState( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetTextureStageState( struct NineDevice9 *This,
DWORD Stage,
D3DTEXTURESTAGESTATETYPE Type,
@@ -2719,7 +2720,7 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetSamplerState( struct NineDevice9 *This,
DWORD Sampler,
D3DSAMPLERSTATETYPE Type,
@@ -2737,7 +2738,7 @@ NineDevice9_GetSamplerState( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetSamplerState( struct NineDevice9 *This,
DWORD Sampler,
D3DSAMPLERSTATETYPE Type,
@@ -2765,7 +2766,7 @@ NineDevice9_SetSamplerState( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_ValidateDevice( struct NineDevice9 *This,
DWORD *pNumPasses )
{
@@ -2805,7 +2806,7 @@ NineDevice9_ValidateDevice( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPaletteEntries( struct NineDevice9 *This,
UINT PaletteNumber,
const PALETTEENTRY *pEntries )
@@ -2813,7 +2814,7 @@ NineDevice9_SetPaletteEntries( struct NineDevice9 *This,
STUB(D3D_OK); /* like wine */
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPaletteEntries( struct NineDevice9 *This,
UINT PaletteNumber,
PALETTEENTRY *pEntries )
@@ -2821,21 +2822,21 @@ NineDevice9_GetPaletteEntries( struct NineDevice9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetCurrentTexturePalette( struct NineDevice9 *This,
UINT PaletteNumber )
{
STUB(D3D_OK); /* like wine */
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetCurrentTexturePalette( struct NineDevice9 *This,
UINT *PaletteNumber )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetScissorRect( struct NineDevice9 *This,
const RECT *pRect )
{
@@ -2854,7 +2855,7 @@ NineDevice9_SetScissorRect( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetScissorRect( struct NineDevice9 *This,
RECT *pRect )
{
@@ -2866,27 +2867,27 @@ NineDevice9_GetScissorRect( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This,
BOOL bSoftware )
{
STUB(D3DERR_INVALIDCALL);
}
-BOOL WINAPI
+BOOL NINE_WINAPI
NineDevice9_GetSoftwareVertexProcessing( struct NineDevice9 *This )
{
return !!(This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetNPatchMode( struct NineDevice9 *This,
float nSegments )
{
STUB(D3DERR_INVALIDCALL);
}
-float WINAPI
+float NINE_WINAPI
NineDevice9_GetNPatchMode( struct NineDevice9 *This )
{
STUB(0);
@@ -2908,7 +2909,7 @@ init_draw_info(struct pipe_draw_info *info,
info->indirect = NULL;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawPrimitive( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
UINT StartVertex,
@@ -2933,7 +2934,7 @@ NineDevice9_DrawPrimitive( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
INT BaseVertexIndex,
@@ -2967,7 +2968,7 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
UINT PrimitiveCount,
@@ -3023,7 +3024,7 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
UINT MinVertexIndex,
@@ -3115,7 +3116,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
/* TODO: Write to pDestBuffer directly if vertex declaration contains
* only f32 formats.
*/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_ProcessVertices( struct NineDevice9 *This,
UINT SrcStartIndex,
UINT DestIndex,
@@ -3208,7 +3209,7 @@ out:
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateVertexDeclaration( struct NineDevice9 *This,
const D3DVERTEXELEMENT9 *pVertexElements,
IDirect3DVertexDeclaration9 **ppDecl )
@@ -3225,7 +3226,7 @@ NineDevice9_CreateVertexDeclaration( struct NineDevice9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexDeclaration( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 *pDecl )
{
@@ -3250,7 +3251,7 @@ NineDevice9_SetVertexDeclaration( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexDeclaration( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 **ppDecl )
{
@@ -3262,7 +3263,7 @@ NineDevice9_GetVertexDeclaration( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetFVF( struct NineDevice9 *This,
DWORD FVF )
{
@@ -3286,7 +3287,7 @@ NineDevice9_SetFVF( struct NineDevice9 *This,
This, (IDirect3DVertexDeclaration9 *)vdecl);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetFVF( struct NineDevice9 *This,
DWORD *pFVF )
{
@@ -3294,7 +3295,7 @@ NineDevice9_GetFVF( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateVertexShader( struct NineDevice9 *This,
const DWORD *pFunction,
IDirect3DVertexShader9 **ppShader )
@@ -3311,7 +3312,7 @@ NineDevice9_CreateVertexShader( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexShader( struct NineDevice9 *This,
IDirect3DVertexShader9 *pShader )
{
@@ -3336,7 +3337,7 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexShader( struct NineDevice9 *This,
IDirect3DVertexShader9 **ppShader )
{
@@ -3345,7 +3346,7 @@ NineDevice9_GetVertexShader( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
const float *pConstantData,
@@ -3382,7 +3383,7 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
float *pConstantData,
@@ -3401,7 +3402,7 @@ NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
const int *pConstantData,
@@ -3441,7 +3442,7 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
int *pConstantData,
@@ -3470,7 +3471,7 @@ NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
const BOOL *pConstantData,
@@ -3506,7 +3507,7 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
BOOL *pConstantData,
@@ -3525,7 +3526,7 @@ NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetStreamSource( struct NineDevice9 *This,
UINT StreamNumber,
IDirect3DVertexBuffer9 *pStreamData,
@@ -3562,7 +3563,7 @@ NineDevice9_SetStreamSource( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetStreamSource( struct NineDevice9 *This,
UINT StreamNumber,
IDirect3DVertexBuffer9 **ppStreamData,
@@ -3582,7 +3583,7 @@ NineDevice9_GetStreamSource( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
UINT StreamNumber,
UINT Setting )
@@ -3616,7 +3617,7 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetStreamSourceFreq( struct NineDevice9 *This,
UINT StreamNumber,
UINT *pSetting )
@@ -3626,7 +3627,7 @@ NineDevice9_GetStreamSourceFreq( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetIndices( struct NineDevice9 *This,
IDirect3DIndexBuffer9 *pIndexData )
{
@@ -3647,7 +3648,7 @@ NineDevice9_SetIndices( struct NineDevice9 *This,
/* XXX: wine/d3d9 doesn't have pBaseVertexIndex, and it doesn't make sense
* here because it's an argument passed to the Draw calls.
*/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetIndices( struct NineDevice9 *This,
IDirect3DIndexBuffer9 **ppIndexData /*,
UINT *pBaseVertexIndex */ )
@@ -3657,7 +3658,7 @@ NineDevice9_GetIndices( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreatePixelShader( struct NineDevice9 *This,
const DWORD *pFunction,
IDirect3DPixelShader9 **ppShader )
@@ -3674,7 +3675,7 @@ NineDevice9_CreatePixelShader( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPixelShader( struct NineDevice9 *This,
IDirect3DPixelShader9 *pShader )
{
@@ -3704,7 +3705,7 @@ NineDevice9_SetPixelShader( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPixelShader( struct NineDevice9 *This,
IDirect3DPixelShader9 **ppShader )
{
@@ -3713,7 +3714,7 @@ NineDevice9_GetPixelShader( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
const float *pConstantData,
@@ -3750,7 +3751,7 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPixelShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
float *pConstantData,
@@ -3769,7 +3770,7 @@ NineDevice9_GetPixelShaderConstantF( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
const int *pConstantData,
@@ -3808,7 +3809,7 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPixelShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
int *pConstantData,
@@ -3837,7 +3838,7 @@ NineDevice9_GetPixelShaderConstantI( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
const BOOL *pConstantData,
@@ -3873,7 +3874,7 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPixelShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
BOOL *pConstantData,
@@ -3892,7 +3893,7 @@ NineDevice9_GetPixelShaderConstantB( struct NineDevice9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawRectPatch( struct NineDevice9 *This,
UINT Handle,
const float *pNumSegs,
@@ -3901,7 +3902,7 @@ NineDevice9_DrawRectPatch( struct NineDevice9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawTriPatch( struct NineDevice9 *This,
UINT Handle,
const float *pNumSegs,
@@ -3910,14 +3911,14 @@ NineDevice9_DrawTriPatch( struct NineDevice9 *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DeletePatch( struct NineDevice9 *This,
UINT Handle )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateQuery( struct NineDevice9 *This,
D3DQUERYTYPE Type,
IDirect3DQuery9 **ppQuery )
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index 34edf0cfa48..73a43cf08ff 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -68,7 +68,9 @@ struct NineDevice9
struct nine_state *update; /* state to update (&state / &record->state) */
struct nine_state state; /* device state */
+ struct list_head update_buffers;
struct list_head update_textures;
+ struct list_head managed_buffers;
struct list_head managed_textures;
boolean is_recording;
@@ -196,100 +198,100 @@ NineDevice9_GetCaps( struct NineDevice9 *This );
/*** Direct3D public ***/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_TestCooperativeLevel( struct NineDevice9 *This );
-UINT WINAPI
+UINT NINE_WINAPI
NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_EvictManagedResources( struct NineDevice9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetDirect3D( struct NineDevice9 *This,
IDirect3D9 **ppD3D9 );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetDeviceCaps( struct NineDevice9 *This,
D3DCAPS9 *pCaps );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetDisplayMode( struct NineDevice9 *This,
UINT iSwapChain,
D3DDISPLAYMODE *pMode );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetCreationParameters( struct NineDevice9 *This,
D3DDEVICE_CREATION_PARAMETERS *pParameters );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetCursorProperties( struct NineDevice9 *This,
UINT XHotSpot,
UINT YHotSpot,
IDirect3DSurface9 *pCursorBitmap );
-void WINAPI
+void NINE_WINAPI
NineDevice9_SetCursorPosition( struct NineDevice9 *This,
int X,
int Y,
DWORD Flags );
-BOOL WINAPI
+BOOL NINE_WINAPI
NineDevice9_ShowCursor( struct NineDevice9 *This,
BOOL bShow );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters,
IDirect3DSwapChain9 **pSwapChain );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetSwapChain( struct NineDevice9 *This,
UINT iSwapChain,
IDirect3DSwapChain9 **pSwapChain );
-UINT WINAPI
+UINT NINE_WINAPI
NineDevice9_GetNumberOfSwapChains( struct NineDevice9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_Reset( struct NineDevice9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_Present( struct NineDevice9 *This,
const RECT *pSourceRect,
const RECT *pDestRect,
HWND hDestWindowOverride,
const RGNDATA *pDirtyRegion );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetBackBuffer( struct NineDevice9 *This,
UINT iSwapChain,
UINT iBackBuffer,
D3DBACKBUFFER_TYPE Type,
IDirect3DSurface9 **ppBackBuffer );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetRasterStatus( struct NineDevice9 *This,
UINT iSwapChain,
D3DRASTER_STATUS *pRasterStatus );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetDialogBoxMode( struct NineDevice9 *This,
BOOL bEnableDialogs );
-void WINAPI
+void NINE_WINAPI
NineDevice9_SetGammaRamp( struct NineDevice9 *This,
UINT iSwapChain,
DWORD Flags,
const D3DGAMMARAMP *pRamp );
-void WINAPI
+void NINE_WINAPI
NineDevice9_GetGammaRamp( struct NineDevice9 *This,
UINT iSwapChain,
D3DGAMMARAMP *pRamp );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateTexture( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -300,7 +302,7 @@ NineDevice9_CreateTexture( struct NineDevice9 *This,
IDirect3DTexture9 **ppTexture,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateVolumeTexture( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -312,7 +314,7 @@ NineDevice9_CreateVolumeTexture( struct NineDevice9 *This,
IDirect3DVolumeTexture9 **ppVolumeTexture,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateCubeTexture( struct NineDevice9 *This,
UINT EdgeLength,
UINT Levels,
@@ -322,7 +324,7 @@ NineDevice9_CreateCubeTexture( struct NineDevice9 *This,
IDirect3DCubeTexture9 **ppCubeTexture,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateVertexBuffer( struct NineDevice9 *This,
UINT Length,
DWORD Usage,
@@ -331,7 +333,7 @@ NineDevice9_CreateVertexBuffer( struct NineDevice9 *This,
IDirect3DVertexBuffer9 **ppVertexBuffer,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateIndexBuffer( struct NineDevice9 *This,
UINT Length,
DWORD Usage,
@@ -340,7 +342,7 @@ NineDevice9_CreateIndexBuffer( struct NineDevice9 *This,
IDirect3DIndexBuffer9 **ppIndexBuffer,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateRenderTarget( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -351,7 +353,7 @@ NineDevice9_CreateRenderTarget( struct NineDevice9 *This,
IDirect3DSurface9 **ppSurface,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateDepthStencilSurface( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -362,29 +364,29 @@ NineDevice9_CreateDepthStencilSurface( struct NineDevice9 *This,
IDirect3DSurface9 **ppSurface,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_UpdateSurface( struct NineDevice9 *This,
IDirect3DSurface9 *pSourceSurface,
const RECT *pSourceRect,
IDirect3DSurface9 *pDestinationSurface,
const POINT *pDestPoint );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_UpdateTexture( struct NineDevice9 *This,
IDirect3DBaseTexture9 *pSourceTexture,
IDirect3DBaseTexture9 *pDestinationTexture );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetRenderTargetData( struct NineDevice9 *This,
IDirect3DSurface9 *pRenderTarget,
IDirect3DSurface9 *pDestSurface );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetFrontBufferData( struct NineDevice9 *This,
UINT iSwapChain,
IDirect3DSurface9 *pDestSurface );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_StretchRect( struct NineDevice9 *This,
IDirect3DSurface9 *pSourceSurface,
const RECT *pSourceRect,
@@ -392,13 +394,13 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
const RECT *pDestRect,
D3DTEXTUREFILTERTYPE Filter );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_ColorFill( struct NineDevice9 *This,
IDirect3DSurface9 *pSurface,
const RECT *pRect,
D3DCOLOR color );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -407,31 +409,31 @@ NineDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This,
IDirect3DSurface9 **ppSurface,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetRenderTarget( struct NineDevice9 *This,
DWORD RenderTargetIndex,
IDirect3DSurface9 *pRenderTarget );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetRenderTarget( struct NineDevice9 *This,
DWORD RenderTargetIndex,
IDirect3DSurface9 **ppRenderTarget );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetDepthStencilSurface( struct NineDevice9 *This,
IDirect3DSurface9 *pNewZStencil );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetDepthStencilSurface( struct NineDevice9 *This,
IDirect3DSurface9 **ppZStencilSurface );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_BeginScene( struct NineDevice9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_EndScene( struct NineDevice9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_Clear( struct NineDevice9 *This,
DWORD Count,
const D3DRECT *pRects,
@@ -440,182 +442,182 @@ NineDevice9_Clear( struct NineDevice9 *This,
float Z,
DWORD Stencil );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetTransform( struct NineDevice9 *This,
D3DTRANSFORMSTATETYPE State,
const D3DMATRIX *pMatrix );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetTransform( struct NineDevice9 *This,
D3DTRANSFORMSTATETYPE State,
D3DMATRIX *pMatrix );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_MultiplyTransform( struct NineDevice9 *This,
D3DTRANSFORMSTATETYPE State,
const D3DMATRIX *pMatrix );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetViewport( struct NineDevice9 *This,
const D3DVIEWPORT9 *pViewport );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetViewport( struct NineDevice9 *This,
D3DVIEWPORT9 *pViewport );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetMaterial( struct NineDevice9 *This,
const D3DMATERIAL9 *pMaterial );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetMaterial( struct NineDevice9 *This,
D3DMATERIAL9 *pMaterial );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetLight( struct NineDevice9 *This,
DWORD Index,
const D3DLIGHT9 *pLight );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetLight( struct NineDevice9 *This,
DWORD Index,
D3DLIGHT9 *pLight );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_LightEnable( struct NineDevice9 *This,
DWORD Index,
BOOL Enable );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetLightEnable( struct NineDevice9 *This,
DWORD Index,
BOOL *pEnable );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetClipPlane( struct NineDevice9 *This,
DWORD Index,
const float *pPlane );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetClipPlane( struct NineDevice9 *This,
DWORD Index,
float *pPlane );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetRenderState( struct NineDevice9 *This,
D3DRENDERSTATETYPE State,
DWORD Value );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetRenderState( struct NineDevice9 *This,
D3DRENDERSTATETYPE State,
DWORD *pValue );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateStateBlock( struct NineDevice9 *This,
D3DSTATEBLOCKTYPE Type,
IDirect3DStateBlock9 **ppSB );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_BeginStateBlock( struct NineDevice9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_EndStateBlock( struct NineDevice9 *This,
IDirect3DStateBlock9 **ppSB );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetClipStatus( struct NineDevice9 *This,
const D3DCLIPSTATUS9 *pClipStatus );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetClipStatus( struct NineDevice9 *This,
D3DCLIPSTATUS9 *pClipStatus );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetTexture( struct NineDevice9 *This,
DWORD Stage,
IDirect3DBaseTexture9 **ppTexture );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetTexture( struct NineDevice9 *This,
DWORD Stage,
IDirect3DBaseTexture9 *pTexture );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetTextureStageState( struct NineDevice9 *This,
DWORD Stage,
D3DTEXTURESTAGESTATETYPE Type,
DWORD *pValue );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetTextureStageState( struct NineDevice9 *This,
DWORD Stage,
D3DTEXTURESTAGESTATETYPE Type,
DWORD Value );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetSamplerState( struct NineDevice9 *This,
DWORD Sampler,
D3DSAMPLERSTATETYPE Type,
DWORD *pValue );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetSamplerState( struct NineDevice9 *This,
DWORD Sampler,
D3DSAMPLERSTATETYPE Type,
DWORD Value );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_ValidateDevice( struct NineDevice9 *This,
DWORD *pNumPasses );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPaletteEntries( struct NineDevice9 *This,
UINT PaletteNumber,
const PALETTEENTRY *pEntries );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPaletteEntries( struct NineDevice9 *This,
UINT PaletteNumber,
PALETTEENTRY *pEntries );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetCurrentTexturePalette( struct NineDevice9 *This,
UINT PaletteNumber );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetCurrentTexturePalette( struct NineDevice9 *This,
UINT *PaletteNumber );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetScissorRect( struct NineDevice9 *This,
const RECT *pRect );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetScissorRect( struct NineDevice9 *This,
RECT *pRect );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This,
BOOL bSoftware );
-BOOL WINAPI
+BOOL NINE_WINAPI
NineDevice9_GetSoftwareVertexProcessing( struct NineDevice9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetNPatchMode( struct NineDevice9 *This,
float nSegments );
-float WINAPI
+float NINE_WINAPI
NineDevice9_GetNPatchMode( struct NineDevice9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawPrimitive( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
UINT StartVertex,
UINT PrimitiveCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
INT BaseVertexIndex,
@@ -624,14 +626,14 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
UINT startIndex,
UINT primCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
UINT PrimitiveCount,
const void *pVertexStreamZeroData,
UINT VertexStreamZeroStride );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
UINT MinVertexIndex,
@@ -642,7 +644,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
const void *pVertexStreamZeroData,
UINT VertexStreamZeroStride );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_ProcessVertices( struct NineDevice9 *This,
UINT SrcStartIndex,
UINT DestIndex,
@@ -651,175 +653,175 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 *pVertexDecl,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateVertexDeclaration( struct NineDevice9 *This,
const D3DVERTEXELEMENT9 *pVertexElements,
IDirect3DVertexDeclaration9 **ppDecl );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexDeclaration( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 *pDecl );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexDeclaration( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 **ppDecl );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetFVF( struct NineDevice9 *This,
DWORD FVF );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetFVF( struct NineDevice9 *This,
DWORD *pFVF );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateVertexShader( struct NineDevice9 *This,
const DWORD *pFunction,
IDirect3DVertexShader9 **ppShader );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexShader( struct NineDevice9 *This,
IDirect3DVertexShader9 *pShader );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexShader( struct NineDevice9 *This,
IDirect3DVertexShader9 **ppShader );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
const float *pConstantData,
UINT Vector4fCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
float *pConstantData,
UINT Vector4fCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
const int *pConstantData,
UINT Vector4iCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
int *pConstantData,
UINT Vector4iCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
const BOOL *pConstantData,
UINT BoolCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
BOOL *pConstantData,
UINT BoolCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetStreamSource( struct NineDevice9 *This,
UINT StreamNumber,
IDirect3DVertexBuffer9 *pStreamData,
UINT OffsetInBytes,
UINT Stride );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetStreamSource( struct NineDevice9 *This,
UINT StreamNumber,
IDirect3DVertexBuffer9 **ppStreamData,
UINT *pOffsetInBytes,
UINT *pStride );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
UINT StreamNumber,
UINT Setting );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetStreamSourceFreq( struct NineDevice9 *This,
UINT StreamNumber,
UINT *pSetting );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetIndices( struct NineDevice9 *This,
IDirect3DIndexBuffer9 *pIndexData );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetIndices( struct NineDevice9 *This,
IDirect3DIndexBuffer9 **ppIndexData /*,
UINT *pBaseVertexIndex */ );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreatePixelShader( struct NineDevice9 *This,
const DWORD *pFunction,
IDirect3DPixelShader9 **ppShader );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPixelShader( struct NineDevice9 *This,
IDirect3DPixelShader9 *pShader );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPixelShader( struct NineDevice9 *This,
IDirect3DPixelShader9 **ppShader );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
const float *pConstantData,
UINT Vector4fCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPixelShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
float *pConstantData,
UINT Vector4fCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
const int *pConstantData,
UINT Vector4iCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPixelShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
int *pConstantData,
UINT Vector4iCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
const BOOL *pConstantData,
UINT BoolCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_GetPixelShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
BOOL *pConstantData,
UINT BoolCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawRectPatch( struct NineDevice9 *This,
UINT Handle,
const float *pNumSegs,
const D3DRECTPATCH_INFO *pRectPatchInfo );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DrawTriPatch( struct NineDevice9 *This,
UINT Handle,
const float *pNumSegs,
const D3DTRIPATCH_INFO *pTriPatchInfo );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_DeletePatch( struct NineDevice9 *This,
UINT Handle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9_CreateQuery( struct NineDevice9 *This,
D3DQUERYTYPE Type,
IDirect3DQuery9 **ppQuery );
diff --git a/src/gallium/state_trackers/nine/device9ex.c b/src/gallium/state_trackers/nine/device9ex.c
index 11244b1bedf..fd4272468cb 100644
--- a/src/gallium/state_trackers/nine/device9ex.c
+++ b/src/gallium/state_trackers/nine/device9ex.c
@@ -62,7 +62,7 @@ NineDevice9Ex_dtor( struct NineDevice9Ex *This )
NineDevice9_dtor(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This,
UINT width,
UINT height,
@@ -72,7 +72,7 @@ NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_ComposeRects( struct NineDevice9Ex *This,
IDirect3DSurface9 *pSrc,
IDirect3DSurface9 *pDst,
@@ -86,7 +86,7 @@ NineDevice9Ex_ComposeRects( struct NineDevice9Ex *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_PresentEx( struct NineDevice9Ex *This,
const RECT *pSourceRect,
const RECT *pDestRect,
@@ -111,28 +111,28 @@ NineDevice9Ex_PresentEx( struct NineDevice9Ex *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This,
INT *pPriority )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_SetGPUThreadPriority( struct NineDevice9Ex *This,
INT Priority )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_WaitForVBlank( struct NineDevice9Ex *This,
UINT iSwapChain )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This,
IDirect3DResource9 **pResourceArray,
UINT32 NumResources )
@@ -140,21 +140,21 @@ NineDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_SetMaximumFrameLatency( struct NineDevice9Ex *This,
UINT MaxLatency )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_GetMaximumFrameLatency( struct NineDevice9Ex *This,
UINT *pMaxLatency )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This,
HWND hDestinationWindow )
{
@@ -173,7 +173,7 @@ NineDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This,
UINT Width,
UINT Height,
@@ -188,7 +188,7 @@ NineDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This,
UINT Width,
UINT Height,
@@ -201,7 +201,7 @@ NineDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This,
UINT Width,
UINT Height,
@@ -216,7 +216,7 @@ NineDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_ResetEx( struct NineDevice9Ex *This,
D3DPRESENT_PARAMETERS *pPresentationParameters,
D3DDISPLAYMODEEX *pFullscreenDisplayMode )
@@ -241,7 +241,7 @@ NineDevice9Ex_ResetEx( struct NineDevice9Ex *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_Reset( struct NineDevice9Ex *This,
D3DPRESENT_PARAMETERS *pPresentationParameters )
{
@@ -267,7 +267,7 @@ NineDevice9Ex_Reset( struct NineDevice9Ex *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This,
UINT iSwapChain,
D3DDISPLAYMODEEX *pMode,
@@ -284,7 +284,7 @@ NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This,
return NineSwapChain9Ex_GetDisplayModeEx(swapchain, pMode, pRotation);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This )
{
return D3D_OK;
diff --git a/src/gallium/state_trackers/nine/device9ex.h b/src/gallium/state_trackers/nine/device9ex.h
index 1c7e57e0974..3306f238b59 100644
--- a/src/gallium/state_trackers/nine/device9ex.h
+++ b/src/gallium/state_trackers/nine/device9ex.h
@@ -47,14 +47,14 @@ NineDevice9Ex_new( struct pipe_screen *pScreen,
struct NineDevice9Ex **ppOut,
int minorVersionNum );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This,
UINT width,
UINT height,
float *rows,
float *columns );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_ComposeRects( struct NineDevice9Ex *This,
IDirect3DSurface9 *pSrc,
IDirect3DSurface9 *pDst,
@@ -65,7 +65,7 @@ NineDevice9Ex_ComposeRects( struct NineDevice9Ex *This,
int Xoffset,
int Yoffset );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_PresentEx( struct NineDevice9Ex *This,
const RECT *pSourceRect,
const RECT *pDestRect,
@@ -73,43 +73,43 @@ NineDevice9Ex_PresentEx( struct NineDevice9Ex *This,
const RGNDATA *pDirtyRegion,
DWORD dwFlags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_Present( struct NineDevice9Ex *This,
const RECT *pSourceRect,
const RECT *pDestRect,
HWND hDestWindowOverride,
const RGNDATA *pDirtyRegion );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This,
INT *pPriority );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_SetGPUThreadPriority( struct NineDevice9Ex *This,
INT Priority );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_WaitForVBlank( struct NineDevice9Ex *This,
UINT iSwapChain );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This,
IDirect3DResource9 **pResourceArray,
UINT32 NumResources );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_SetMaximumFrameLatency( struct NineDevice9Ex *This,
UINT MaxLatency );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_GetMaximumFrameLatency( struct NineDevice9Ex *This,
UINT *pMaxLatency );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This,
HWND hDestinationWindow );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This,
UINT Width,
UINT Height,
@@ -121,7 +121,7 @@ NineDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This,
HANDLE *pSharedHandle,
DWORD Usage );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This,
UINT Width,
UINT Height,
@@ -131,7 +131,7 @@ NineDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This,
HANDLE *pSharedHandle,
DWORD Usage );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This,
UINT Width,
UINT Height,
@@ -143,22 +143,22 @@ NineDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This,
HANDLE *pSharedHandle,
DWORD Usage );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_ResetEx( struct NineDevice9Ex *This,
D3DPRESENT_PARAMETERS *pPresentationParameters,
D3DDISPLAYMODEEX *pFullscreenDisplayMode );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_Reset( struct NineDevice9Ex *This,
D3DPRESENT_PARAMETERS *pPresentationParameters );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This,
UINT iSwapChain,
D3DDISPLAYMODEEX *pMode,
D3DDISPLAYROTATION *pRotation );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This );
#endif /* _NINE_DEVICE9EX_H_ */
diff --git a/src/gallium/state_trackers/nine/device9video.c b/src/gallium/state_trackers/nine/device9video.c
index 65cc6a05c68..2e8e94a26e7 100644
--- a/src/gallium/state_trackers/nine/device9video.c
+++ b/src/gallium/state_trackers/nine/device9video.c
@@ -24,7 +24,7 @@
#define DBG_CHANNEL DBG_DEVICEVIDEO
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This,
const GUID *pCryptoType,
const GUID *pDecodeProfile,
@@ -33,7 +33,7 @@ NineDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This,
D3DAUTHENTICATEDCHANNELTYPE ChannelType,
IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel,
@@ -42,7 +42,7 @@ NineDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This,
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Video_CreateCryptoSession( struct NineDevice9Video *This,
const GUID *pCryptoType,
const GUID *pDecodeProfile,
diff --git a/src/gallium/state_trackers/nine/device9video.h b/src/gallium/state_trackers/nine/device9video.h
index fc2faeb624a..d77fd5ec57a 100644
--- a/src/gallium/state_trackers/nine/device9video.h
+++ b/src/gallium/state_trackers/nine/device9video.h
@@ -35,19 +35,19 @@ NineDevice9Video( void *data )
return (struct NineDevice9Video *)data;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This,
const GUID *pCryptoType,
const GUID *pDecodeProfile,
D3DCONTENTPROTECTIONCAPS *pCaps );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This,
D3DAUTHENTICATEDCHANNELTYPE ChannelType,
IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel,
HANDLE *pChannelHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineDevice9Video_CreateCryptoSession( struct NineDevice9Video *This,
const GUID *pCryptoType,
const GUID *pDecodeProfile,
diff --git a/src/gallium/state_trackers/nine/indexbuffer9.c b/src/gallium/state_trackers/nine/indexbuffer9.c
index 401fe75e95f..0a31d7e9716 100644
--- a/src/gallium/state_trackers/nine/indexbuffer9.c
+++ b/src/gallium/state_trackers/nine/indexbuffer9.c
@@ -85,7 +85,7 @@ NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This )
return NineBuffer9_GetResource(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
UINT OffsetToLock,
UINT SizeToLock,
@@ -95,13 +95,13 @@ NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineIndexBuffer9_Unlock( struct NineIndexBuffer9 *This )
{
return NineBuffer9_Unlock(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineIndexBuffer9_GetDesc( struct NineIndexBuffer9 *This,
D3DINDEXBUFFER_DESC *pDesc )
{
diff --git a/src/gallium/state_trackers/nine/indexbuffer9.h b/src/gallium/state_trackers/nine/indexbuffer9.h
index f3274b71224..4802105c952 100644
--- a/src/gallium/state_trackers/nine/indexbuffer9.h
+++ b/src/gallium/state_trackers/nine/indexbuffer9.h
@@ -70,17 +70,17 @@ struct pipe_resource *
NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This );
/*** Direct3D public ***/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
UINT OffsetToLock,
UINT SizeToLock,
void **ppbData,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineIndexBuffer9_Unlock( struct NineIndexBuffer9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineIndexBuffer9_GetDesc( struct NineIndexBuffer9 *This,
D3DINDEXBUFFER_DESC *pDesc );
diff --git a/src/gallium/state_trackers/nine/iunknown.c b/src/gallium/state_trackers/nine/iunknown.c
index aaf17bfeda7..a28c8b2de8b 100644
--- a/src/gallium/state_trackers/nine/iunknown.c
+++ b/src/gallium/state_trackers/nine/iunknown.c
@@ -51,7 +51,7 @@ NineUnknown_dtor( struct NineUnknown *This )
FREE(This);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineUnknown_QueryInterface( struct NineUnknown *This,
REFIID riid,
void **ppvObject )
@@ -75,7 +75,7 @@ NineUnknown_QueryInterface( struct NineUnknown *This,
return E_NOINTERFACE;
}
-ULONG WINAPI
+ULONG NINE_WINAPI
NineUnknown_AddRef( struct NineUnknown *This )
{
ULONG r;
@@ -94,7 +94,7 @@ NineUnknown_AddRef( struct NineUnknown *This )
return r;
}
-ULONG WINAPI
+ULONG NINE_WINAPI
NineUnknown_Release( struct NineUnknown *This )
{
if (This->forward)
@@ -117,7 +117,7 @@ NineUnknown_Release( struct NineUnknown *This )
return r;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineUnknown_GetDevice( struct NineUnknown *This,
IDirect3DDevice9 **ppDevice )
{
diff --git a/src/gallium/state_trackers/nine/iunknown.h b/src/gallium/state_trackers/nine/iunknown.h
index 628d984553e..b8de6be9663 100644
--- a/src/gallium/state_trackers/nine/iunknown.h
+++ b/src/gallium/state_trackers/nine/iunknown.h
@@ -28,6 +28,7 @@
#include "util/u_memory.h"
#include "guid.h"
+#include "nine_flags.h"
#include "nine_debug.h"
#include "nine_quirk.h"
@@ -77,18 +78,18 @@ NineUnknown_dtor( struct NineUnknown *This );
/*** Direct3D public methods ***/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineUnknown_QueryInterface( struct NineUnknown *This,
REFIID riid,
void **ppvObject );
-ULONG WINAPI
+ULONG NINE_WINAPI
NineUnknown_AddRef( struct NineUnknown *This );
-ULONG WINAPI
+ULONG NINE_WINAPI
NineUnknown_Release( struct NineUnknown *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineUnknown_GetDevice( struct NineUnknown *This,
IDirect3DDevice9 **ppDevice );
diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c
index a5466a7bdd4..d5daabd6661 100644
--- a/src/gallium/state_trackers/nine/nine_ff.c
+++ b/src/gallium/state_trackers/nine/nine_ff.c
@@ -24,8 +24,6 @@
#include "util/u_hash_table.h"
#include "util/u_upload_mgr.h"
-#define NINE_TGSI_LAZY_DEVS 1
-
#define DBG_CHANNEL DBG_FF
#define NINE_FF_NUM_VS_CONST 256
@@ -319,15 +317,11 @@ ureg_normalize3(struct ureg_program *ureg,
struct ureg_dst dst, struct ureg_src src,
struct ureg_dst tmp)
{
-#ifdef NINE_TGSI_LAZY_DEVS
struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
ureg_DP3(ureg, tmp_x, src, src);
ureg_RSQ(ureg, tmp_x, _X(tmp));
ureg_MUL(ureg, dst, src, _X(tmp));
-#else
- ureg_NRM(ureg, dst, src);
-#endif
}
static void *
@@ -549,34 +543,22 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
*/
if (key->vertexpointsize) {
struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
-#ifdef NINE_TGSI_LAZY_DEVS
- struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg);
-
- ureg_MAX(ureg, tmp_clamp, vs->aPsz, _XXXX(cPsz1));
- ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1));
- ureg_release_temporary(ureg, tmp_clamp);
-#else
- ureg_CLAMP(ureg, oPsz, vs->aPsz, _XXXX(cPsz1), _YYYY(cPsz1));
-#endif
+ ureg_MAX(ureg, tmp_x, _XXXX(vs->aPsz), _XXXX(cPsz1));
+ ureg_MIN(ureg, oPsz, _X(tmp), _YYYY(cPsz1));
} else if (key->pointscale) {
struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
ureg_DP3(ureg, tmp_x, ureg_src(r[1]), ureg_src(r[1]));
- ureg_SQRT(ureg, tmp_y, _X(tmp));
+ ureg_RSQ(ureg, tmp_y, _X(tmp));
+ ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
+ ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
ureg_RCP(ureg, tmp_x, ureg_src(tmp));
ureg_MUL(ureg, tmp_x, ureg_src(tmp), _ZZZZ(cPsz1));
-#ifdef NINE_TGSI_LAZY_DEVS
- struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg);
-
- ureg_MAX(ureg, tmp_clamp, _X(tmp), _XXXX(cPsz1));
- ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1));
- ureg_release_temporary(ureg, tmp_clamp);
-#else
- ureg_CLAMP(ureg, oPsz, _X(tmp), _XXXX(cPsz1), _YYYY(cPsz1));
-#endif
+ ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
+ ureg_MIN(ureg, oPsz, _X(tmp), _YYYY(cPsz1));
}
for (i = 0; i < 8; ++i) {
diff --git a/src/gallium/state_trackers/nine/nine_flags.h b/src/gallium/state_trackers/nine/nine_flags.h
new file mode 100644
index 00000000000..61686a7f60f
--- /dev/null
+++ b/src/gallium/state_trackers/nine/nine_flags.h
@@ -0,0 +1,14 @@
+
+
+#ifndef _NINE_FLAGS_H_
+#define _NINE_FLAGS_H_
+
+#include "pipe/p_compiler.h"
+
+/* Incoming 32 bits calls are 4-byte aligned.
+ * We need to realign them to be able to use
+ * SSE and to work with other libraries (llvm, etc)
+ */
+#define NINE_WINAPI WINAPI PIPE_ALIGN_STACK
+
+#endif /* _NINE_FLAGS_H_ */ \ No newline at end of file
diff --git a/src/gallium/state_trackers/nine/nine_lock.c b/src/gallium/state_trackers/nine/nine_lock.c
index 42cbb0589c4..6e15cc190e8 100644
--- a/src/gallium/state_trackers/nine/nine_lock.c
+++ b/src/gallium/state_trackers/nine/nine_lock.c
@@ -49,7 +49,7 @@
/* Global mutex as described by MSDN */
pipe_static_mutex(d3dlock_global);
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockAuthenticatedChannel9_GetCertificateSize( struct NineAuthenticatedChannel9 *This,
UINT *pCertificateSize )
{
@@ -60,7 +60,7 @@ LockAuthenticatedChannel9_GetCertificateSize( struct NineAuthenticatedChannel9 *
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This,
UINT CertifacteSize,
BYTE *ppCertificate )
@@ -72,7 +72,7 @@ LockAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9 *This,
UINT DataSize,
void *pData )
@@ -84,7 +84,7 @@ LockAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This,
UINT InputSize,
const void *pInput,
@@ -98,7 +98,7 @@ LockAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockAuthenticatedChannel9_Configure( struct NineAuthenticatedChannel9 *This,
UINT InputSize,
const void *pInput,
@@ -123,7 +123,7 @@ IDirect3DAuthenticatedChannel9Vtbl LockAuthenticatedChannel9_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockResource9_GetDevice( struct NineResource9 *This,
IDirect3DDevice9 **ppDevice )
{
@@ -135,7 +135,7 @@ LockResource9_GetDevice( struct NineResource9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockResource9_SetPrivateData( struct NineResource9 *This,
REFGUID refguid,
const void *pData,
@@ -149,7 +149,7 @@ LockResource9_SetPrivateData( struct NineResource9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockResource9_GetPrivateData( struct NineResource9 *This,
REFGUID refguid,
void *pData,
@@ -162,7 +162,7 @@ LockResource9_GetPrivateData( struct NineResource9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockResource9_FreePrivateData( struct NineResource9 *This,
REFGUID refguid )
{
@@ -173,7 +173,7 @@ LockResource9_FreePrivateData( struct NineResource9 *This,
return r;
}
-static DWORD WINAPI
+static DWORD NINE_WINAPI
LockResource9_SetPriority( struct NineResource9 *This,
DWORD PriorityNew )
{
@@ -184,7 +184,7 @@ LockResource9_SetPriority( struct NineResource9 *This,
return r;
}
-static DWORD WINAPI
+static DWORD NINE_WINAPI
LockResource9_GetPriority( struct NineResource9 *This )
{
DWORD r;
@@ -195,7 +195,7 @@ LockResource9_GetPriority( struct NineResource9 *This )
}
#if 0
-static void WINAPI
+static void NINE_WINAPI
LockResource9_PreLoad( struct NineResource9 *This )
{
pipe_mutex_lock(d3dlock_global);
@@ -205,7 +205,7 @@ LockResource9_PreLoad( struct NineResource9 *This )
#endif
#if 0
-static D3DRESOURCETYPE WINAPI
+static D3DRESOURCETYPE NINE_WINAPI
LockResource9_GetType( struct NineResource9 *This )
{
D3DRESOURCETYPE r;
@@ -216,7 +216,7 @@ LockResource9_GetType( struct NineResource9 *This )
}
#endif
-static DWORD WINAPI
+static DWORD NINE_WINAPI
LockBaseTexture9_SetLOD( struct NineBaseTexture9 *This,
DWORD LODNew )
{
@@ -227,7 +227,7 @@ LockBaseTexture9_SetLOD( struct NineBaseTexture9 *This,
return r;
}
-static DWORD WINAPI
+static DWORD NINE_WINAPI
LockBaseTexture9_GetLOD( struct NineBaseTexture9 *This )
{
DWORD r;
@@ -237,7 +237,7 @@ LockBaseTexture9_GetLOD( struct NineBaseTexture9 *This )
return r;
}
-static DWORD WINAPI
+static DWORD NINE_WINAPI
LockBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This )
{
DWORD r;
@@ -247,7 +247,7 @@ LockBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This,
D3DTEXTUREFILTERTYPE FilterType )
{
@@ -258,7 +258,7 @@ LockBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This,
return r;
}
-static D3DTEXTUREFILTERTYPE WINAPI
+static D3DTEXTUREFILTERTYPE NINE_WINAPI
LockBaseTexture9_GetAutoGenFilterType( struct NineBaseTexture9 *This )
{
D3DTEXTUREFILTERTYPE r;
@@ -268,7 +268,7 @@ LockBaseTexture9_GetAutoGenFilterType( struct NineBaseTexture9 *This )
return r;
}
-static void WINAPI
+static void NINE_WINAPI
LockBaseTexture9_PreLoad( struct NineBaseTexture9 *This )
{
pipe_mutex_lock(d3dlock_global);
@@ -276,7 +276,7 @@ LockBaseTexture9_PreLoad( struct NineBaseTexture9 *This )
pipe_mutex_unlock(d3dlock_global);
}
-static void WINAPI
+static void NINE_WINAPI
LockBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This )
{
pipe_mutex_lock(d3dlock_global);
@@ -284,7 +284,7 @@ LockBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This )
pipe_mutex_unlock(d3dlock_global);
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCryptoSession9_GetCertificateSize( struct NineCryptoSession9 *This,
UINT *pCertificateSize )
{
@@ -295,7 +295,7 @@ LockCryptoSession9_GetCertificateSize( struct NineCryptoSession9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCryptoSession9_GetCertificate( struct NineCryptoSession9 *This,
UINT CertifacteSize,
BYTE *ppCertificate )
@@ -307,7 +307,7 @@ LockCryptoSession9_GetCertificate( struct NineCryptoSession9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This,
UINT DataSize,
void *pData )
@@ -319,7 +319,7 @@ LockCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This,
IDirect3DSurface9 *pSrcSurface,
IDirect3DSurface9 *pDstSurface,
@@ -333,7 +333,7 @@ LockCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This,
IDirect3DSurface9 *pSrcSurface,
IDirect3DSurface9 *pDstSurface,
@@ -349,7 +349,7 @@ LockCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This,
IDirect3DSurface9 *pSrcSurface,
UINT *pSurfacePitch )
@@ -361,7 +361,7 @@ LockCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This,
void *pRandomNumber,
UINT RandomNumberSize )
@@ -373,7 +373,7 @@ LockCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCryptoSession9_FinishSessionKeyRefresh( struct NineCryptoSession9 *This )
{
HRESULT r;
@@ -383,7 +383,7 @@ LockCryptoSession9_FinishSessionKeyRefresh( struct NineCryptoSession9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCryptoSession9_GetEncryptionBltKey( struct NineCryptoSession9 *This,
void *pReadbackKey,
UINT KeySize )
@@ -411,7 +411,7 @@ IDirect3DCryptoSession9Vtbl LockCryptoSession9_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This,
UINT Level,
D3DSURFACE_DESC *pDesc )
@@ -425,7 +425,7 @@ LockCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This,
#endif
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
UINT Level,
@@ -439,7 +439,7 @@ LockCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCubeTexture9_LockRect( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
UINT Level,
@@ -454,7 +454,7 @@ LockCubeTexture9_LockRect( struct NineCubeTexture9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCubeTexture9_UnlockRect( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
UINT Level )
@@ -466,7 +466,7 @@ LockCubeTexture9_UnlockRect( struct NineCubeTexture9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This,
D3DCUBEMAP_FACES FaceType,
const RECT *pDirtyRect )
@@ -503,7 +503,7 @@ IDirect3DCubeTexture9Vtbl LockCubeTexture9_vtable = {
(void *)LockCubeTexture9_AddDirtyRect
};
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_TestCooperativeLevel( struct NineDevice9 *This )
{
HRESULT r;
@@ -513,7 +513,7 @@ LockDevice9_TestCooperativeLevel( struct NineDevice9 *This )
return r;
}
-static UINT WINAPI
+static UINT NINE_WINAPI
LockDevice9_GetAvailableTextureMem( struct NineDevice9 *This )
{
UINT r;
@@ -523,7 +523,7 @@ LockDevice9_GetAvailableTextureMem( struct NineDevice9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_EvictManagedResources( struct NineDevice9 *This )
{
HRESULT r;
@@ -533,7 +533,7 @@ LockDevice9_EvictManagedResources( struct NineDevice9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetDirect3D( struct NineDevice9 *This,
IDirect3D9 **ppD3D9 )
{
@@ -545,7 +545,7 @@ LockDevice9_GetDirect3D( struct NineDevice9 *This,
}
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetDeviceCaps( struct NineDevice9 *This,
D3DCAPS9 *pCaps )
{
@@ -557,7 +557,7 @@ LockDevice9_GetDeviceCaps( struct NineDevice9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetDisplayMode( struct NineDevice9 *This,
UINT iSwapChain,
D3DDISPLAYMODE *pMode )
@@ -570,7 +570,7 @@ LockDevice9_GetDisplayMode( struct NineDevice9 *This,
}
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetCreationParameters( struct NineDevice9 *This,
D3DDEVICE_CREATION_PARAMETERS *pParameters )
{
@@ -582,7 +582,7 @@ LockDevice9_GetCreationParameters( struct NineDevice9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetCursorProperties( struct NineDevice9 *This,
UINT XHotSpot,
UINT YHotSpot,
@@ -595,7 +595,7 @@ LockDevice9_SetCursorProperties( struct NineDevice9 *This,
return r;
}
-static void WINAPI
+static void NINE_WINAPI
LockDevice9_SetCursorPosition( struct NineDevice9 *This,
int X,
int Y,
@@ -606,7 +606,7 @@ LockDevice9_SetCursorPosition( struct NineDevice9 *This,
pipe_mutex_unlock(d3dlock_global);
}
-static BOOL WINAPI
+static BOOL NINE_WINAPI
LockDevice9_ShowCursor( struct NineDevice9 *This,
BOOL bShow )
{
@@ -617,7 +617,7 @@ LockDevice9_ShowCursor( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters,
IDirect3DSwapChain9 **pSwapChain )
@@ -629,7 +629,7 @@ LockDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetSwapChain( struct NineDevice9 *This,
UINT iSwapChain,
IDirect3DSwapChain9 **pSwapChain )
@@ -641,7 +641,7 @@ LockDevice9_GetSwapChain( struct NineDevice9 *This,
return r;
}
-static UINT WINAPI
+static UINT NINE_WINAPI
LockDevice9_GetNumberOfSwapChains( struct NineDevice9 *This )
{
UINT r;
@@ -651,7 +651,7 @@ LockDevice9_GetNumberOfSwapChains( struct NineDevice9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_Reset( struct NineDevice9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters )
{
@@ -662,7 +662,7 @@ LockDevice9_Reset( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_Present( struct NineDevice9 *This,
const RECT *pSourceRect,
const RECT *pDestRect,
@@ -676,7 +676,7 @@ LockDevice9_Present( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetBackBuffer( struct NineDevice9 *This,
UINT iSwapChain,
UINT iBackBuffer,
@@ -690,7 +690,7 @@ LockDevice9_GetBackBuffer( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetRasterStatus( struct NineDevice9 *This,
UINT iSwapChain,
D3DRASTER_STATUS *pRasterStatus )
@@ -702,7 +702,7 @@ LockDevice9_GetRasterStatus( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetDialogBoxMode( struct NineDevice9 *This,
BOOL bEnableDialogs )
{
@@ -713,7 +713,7 @@ LockDevice9_SetDialogBoxMode( struct NineDevice9 *This,
return r;
}
-static void WINAPI
+static void NINE_WINAPI
LockDevice9_SetGammaRamp( struct NineDevice9 *This,
UINT iSwapChain,
DWORD Flags,
@@ -724,7 +724,7 @@ LockDevice9_SetGammaRamp( struct NineDevice9 *This,
pipe_mutex_unlock(d3dlock_global);
}
-static void WINAPI
+static void NINE_WINAPI
LockDevice9_GetGammaRamp( struct NineDevice9 *This,
UINT iSwapChain,
D3DGAMMARAMP *pRamp )
@@ -734,7 +734,7 @@ LockDevice9_GetGammaRamp( struct NineDevice9 *This,
pipe_mutex_unlock(d3dlock_global);
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateTexture( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -752,7 +752,7 @@ LockDevice9_CreateTexture( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateVolumeTexture( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -771,7 +771,7 @@ LockDevice9_CreateVolumeTexture( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateCubeTexture( struct NineDevice9 *This,
UINT EdgeLength,
UINT Levels,
@@ -788,7 +788,7 @@ LockDevice9_CreateCubeTexture( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateVertexBuffer( struct NineDevice9 *This,
UINT Length,
DWORD Usage,
@@ -804,7 +804,7 @@ LockDevice9_CreateVertexBuffer( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateIndexBuffer( struct NineDevice9 *This,
UINT Length,
DWORD Usage,
@@ -820,7 +820,7 @@ LockDevice9_CreateIndexBuffer( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateRenderTarget( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -838,7 +838,7 @@ LockDevice9_CreateRenderTarget( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateDepthStencilSurface( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -856,7 +856,7 @@ LockDevice9_CreateDepthStencilSurface( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_UpdateSurface( struct NineDevice9 *This,
IDirect3DSurface9 *pSourceSurface,
const RECT *pSourceRect,
@@ -870,7 +870,7 @@ LockDevice9_UpdateSurface( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_UpdateTexture( struct NineDevice9 *This,
IDirect3DBaseTexture9 *pSourceTexture,
IDirect3DBaseTexture9 *pDestinationTexture )
@@ -882,7 +882,7 @@ LockDevice9_UpdateTexture( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetRenderTargetData( struct NineDevice9 *This,
IDirect3DSurface9 *pRenderTarget,
IDirect3DSurface9 *pDestSurface )
@@ -894,7 +894,7 @@ LockDevice9_GetRenderTargetData( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetFrontBufferData( struct NineDevice9 *This,
UINT iSwapChain,
IDirect3DSurface9 *pDestSurface )
@@ -906,7 +906,7 @@ LockDevice9_GetFrontBufferData( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_StretchRect( struct NineDevice9 *This,
IDirect3DSurface9 *pSourceSurface,
const RECT *pSourceRect,
@@ -921,7 +921,7 @@ LockDevice9_StretchRect( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_ColorFill( struct NineDevice9 *This,
IDirect3DSurface9 *pSurface,
const RECT *pRect,
@@ -934,7 +934,7 @@ LockDevice9_ColorFill( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This,
UINT Width,
UINT Height,
@@ -950,7 +950,7 @@ LockDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetRenderTarget( struct NineDevice9 *This,
DWORD RenderTargetIndex,
IDirect3DSurface9 *pRenderTarget )
@@ -962,7 +962,7 @@ LockDevice9_SetRenderTarget( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetRenderTarget( struct NineDevice9 *This,
DWORD RenderTargetIndex,
IDirect3DSurface9 **ppRenderTarget )
@@ -974,7 +974,7 @@ LockDevice9_GetRenderTarget( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetDepthStencilSurface( struct NineDevice9 *This,
IDirect3DSurface9 *pNewZStencil )
{
@@ -985,7 +985,7 @@ LockDevice9_SetDepthStencilSurface( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetDepthStencilSurface( struct NineDevice9 *This,
IDirect3DSurface9 **ppZStencilSurface )
{
@@ -996,7 +996,7 @@ LockDevice9_GetDepthStencilSurface( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_BeginScene( struct NineDevice9 *This )
{
HRESULT r;
@@ -1006,7 +1006,7 @@ LockDevice9_BeginScene( struct NineDevice9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_EndScene( struct NineDevice9 *This )
{
HRESULT r;
@@ -1016,7 +1016,7 @@ LockDevice9_EndScene( struct NineDevice9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_Clear( struct NineDevice9 *This,
DWORD Count,
const D3DRECT *pRects,
@@ -1032,7 +1032,7 @@ LockDevice9_Clear( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetTransform( struct NineDevice9 *This,
D3DTRANSFORMSTATETYPE State,
const D3DMATRIX *pMatrix )
@@ -1044,7 +1044,7 @@ LockDevice9_SetTransform( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetTransform( struct NineDevice9 *This,
D3DTRANSFORMSTATETYPE State,
D3DMATRIX *pMatrix )
@@ -1056,7 +1056,7 @@ LockDevice9_GetTransform( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_MultiplyTransform( struct NineDevice9 *This,
D3DTRANSFORMSTATETYPE State,
const D3DMATRIX *pMatrix )
@@ -1068,7 +1068,7 @@ LockDevice9_MultiplyTransform( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetViewport( struct NineDevice9 *This,
const D3DVIEWPORT9 *pViewport )
{
@@ -1079,7 +1079,7 @@ LockDevice9_SetViewport( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetViewport( struct NineDevice9 *This,
D3DVIEWPORT9 *pViewport )
{
@@ -1090,7 +1090,7 @@ LockDevice9_GetViewport( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetMaterial( struct NineDevice9 *This,
const D3DMATERIAL9 *pMaterial )
{
@@ -1101,7 +1101,7 @@ LockDevice9_SetMaterial( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetMaterial( struct NineDevice9 *This,
D3DMATERIAL9 *pMaterial )
{
@@ -1112,7 +1112,7 @@ LockDevice9_GetMaterial( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetLight( struct NineDevice9 *This,
DWORD Index,
const D3DLIGHT9 *pLight )
@@ -1124,7 +1124,7 @@ LockDevice9_SetLight( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetLight( struct NineDevice9 *This,
DWORD Index,
D3DLIGHT9 *pLight )
@@ -1136,7 +1136,7 @@ LockDevice9_GetLight( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_LightEnable( struct NineDevice9 *This,
DWORD Index,
BOOL Enable )
@@ -1148,7 +1148,7 @@ LockDevice9_LightEnable( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetLightEnable( struct NineDevice9 *This,
DWORD Index,
BOOL *pEnable )
@@ -1160,7 +1160,7 @@ LockDevice9_GetLightEnable( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetClipPlane( struct NineDevice9 *This,
DWORD Index,
const float *pPlane )
@@ -1172,7 +1172,7 @@ LockDevice9_SetClipPlane( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetClipPlane( struct NineDevice9 *This,
DWORD Index,
float *pPlane )
@@ -1184,7 +1184,7 @@ LockDevice9_GetClipPlane( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetRenderState( struct NineDevice9 *This,
D3DRENDERSTATETYPE State,
DWORD Value )
@@ -1196,7 +1196,7 @@ LockDevice9_SetRenderState( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetRenderState( struct NineDevice9 *This,
D3DRENDERSTATETYPE State,
DWORD *pValue )
@@ -1208,7 +1208,7 @@ LockDevice9_GetRenderState( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateStateBlock( struct NineDevice9 *This,
D3DSTATEBLOCKTYPE Type,
IDirect3DStateBlock9 **ppSB )
@@ -1220,7 +1220,7 @@ LockDevice9_CreateStateBlock( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_BeginStateBlock( struct NineDevice9 *This )
{
HRESULT r;
@@ -1230,7 +1230,7 @@ LockDevice9_BeginStateBlock( struct NineDevice9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_EndStateBlock( struct NineDevice9 *This,
IDirect3DStateBlock9 **ppSB )
{
@@ -1241,7 +1241,7 @@ LockDevice9_EndStateBlock( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetClipStatus( struct NineDevice9 *This,
const D3DCLIPSTATUS9 *pClipStatus )
{
@@ -1252,7 +1252,7 @@ LockDevice9_SetClipStatus( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetClipStatus( struct NineDevice9 *This,
D3DCLIPSTATUS9 *pClipStatus )
{
@@ -1263,7 +1263,7 @@ LockDevice9_GetClipStatus( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetTexture( struct NineDevice9 *This,
DWORD Stage,
IDirect3DBaseTexture9 **ppTexture )
@@ -1275,7 +1275,7 @@ LockDevice9_GetTexture( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetTexture( struct NineDevice9 *This,
DWORD Stage,
IDirect3DBaseTexture9 *pTexture )
@@ -1287,7 +1287,7 @@ LockDevice9_SetTexture( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetTextureStageState( struct NineDevice9 *This,
DWORD Stage,
D3DTEXTURESTAGESTATETYPE Type,
@@ -1300,7 +1300,7 @@ LockDevice9_GetTextureStageState( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetTextureStageState( struct NineDevice9 *This,
DWORD Stage,
D3DTEXTURESTAGESTATETYPE Type,
@@ -1313,7 +1313,7 @@ LockDevice9_SetTextureStageState( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetSamplerState( struct NineDevice9 *This,
DWORD Sampler,
D3DSAMPLERSTATETYPE Type,
@@ -1326,7 +1326,7 @@ LockDevice9_GetSamplerState( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetSamplerState( struct NineDevice9 *This,
DWORD Sampler,
D3DSAMPLERSTATETYPE Type,
@@ -1339,7 +1339,7 @@ LockDevice9_SetSamplerState( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_ValidateDevice( struct NineDevice9 *This,
DWORD *pNumPasses )
{
@@ -1350,7 +1350,7 @@ LockDevice9_ValidateDevice( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetPaletteEntries( struct NineDevice9 *This,
UINT PaletteNumber,
const PALETTEENTRY *pEntries )
@@ -1362,7 +1362,7 @@ LockDevice9_SetPaletteEntries( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetPaletteEntries( struct NineDevice9 *This,
UINT PaletteNumber,
PALETTEENTRY *pEntries )
@@ -1374,7 +1374,7 @@ LockDevice9_GetPaletteEntries( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetCurrentTexturePalette( struct NineDevice9 *This,
UINT PaletteNumber )
{
@@ -1385,7 +1385,7 @@ LockDevice9_SetCurrentTexturePalette( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetCurrentTexturePalette( struct NineDevice9 *This,
UINT *PaletteNumber )
{
@@ -1396,7 +1396,7 @@ LockDevice9_GetCurrentTexturePalette( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetScissorRect( struct NineDevice9 *This,
const RECT *pRect )
{
@@ -1407,7 +1407,7 @@ LockDevice9_SetScissorRect( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetScissorRect( struct NineDevice9 *This,
RECT *pRect )
{
@@ -1418,7 +1418,7 @@ LockDevice9_GetScissorRect( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This,
BOOL bSoftware )
{
@@ -1429,7 +1429,7 @@ LockDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This,
return r;
}
-static BOOL WINAPI
+static BOOL NINE_WINAPI
LockDevice9_GetSoftwareVertexProcessing( struct NineDevice9 *This )
{
BOOL r;
@@ -1439,7 +1439,7 @@ LockDevice9_GetSoftwareVertexProcessing( struct NineDevice9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetNPatchMode( struct NineDevice9 *This,
float nSegments )
{
@@ -1450,7 +1450,7 @@ LockDevice9_SetNPatchMode( struct NineDevice9 *This,
return r;
}
-static float WINAPI
+static float NINE_WINAPI
LockDevice9_GetNPatchMode( struct NineDevice9 *This )
{
float r;
@@ -1460,7 +1460,7 @@ LockDevice9_GetNPatchMode( struct NineDevice9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_DrawPrimitive( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
UINT StartVertex,
@@ -1473,7 +1473,7 @@ LockDevice9_DrawPrimitive( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
INT BaseVertexIndex,
@@ -1489,7 +1489,7 @@ LockDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
UINT PrimitiveCount,
@@ -1503,7 +1503,7 @@ LockDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
D3DPRIMITIVETYPE PrimitiveType,
UINT MinVertexIndex,
@@ -1521,7 +1521,7 @@ LockDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_ProcessVertices( struct NineDevice9 *This,
UINT SrcStartIndex,
UINT DestIndex,
@@ -1537,7 +1537,7 @@ LockDevice9_ProcessVertices( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateVertexDeclaration( struct NineDevice9 *This,
const D3DVERTEXELEMENT9 *pVertexElements,
IDirect3DVertexDeclaration9 **ppDecl )
@@ -1549,7 +1549,7 @@ LockDevice9_CreateVertexDeclaration( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetVertexDeclaration( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 *pDecl )
{
@@ -1560,7 +1560,7 @@ LockDevice9_SetVertexDeclaration( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetVertexDeclaration( struct NineDevice9 *This,
IDirect3DVertexDeclaration9 **ppDecl )
{
@@ -1571,7 +1571,7 @@ LockDevice9_GetVertexDeclaration( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetFVF( struct NineDevice9 *This,
DWORD FVF )
{
@@ -1582,7 +1582,7 @@ LockDevice9_SetFVF( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetFVF( struct NineDevice9 *This,
DWORD *pFVF )
{
@@ -1593,7 +1593,7 @@ LockDevice9_GetFVF( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateVertexShader( struct NineDevice9 *This,
const DWORD *pFunction,
IDirect3DVertexShader9 **ppShader )
@@ -1605,7 +1605,7 @@ LockDevice9_CreateVertexShader( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetVertexShader( struct NineDevice9 *This,
IDirect3DVertexShader9 *pShader )
{
@@ -1616,7 +1616,7 @@ LockDevice9_SetVertexShader( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetVertexShader( struct NineDevice9 *This,
IDirect3DVertexShader9 **ppShader )
{
@@ -1627,7 +1627,7 @@ LockDevice9_GetVertexShader( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
const float *pConstantData,
@@ -1640,7 +1640,7 @@ LockDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetVertexShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
float *pConstantData,
@@ -1653,7 +1653,7 @@ LockDevice9_GetVertexShaderConstantF( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
const int *pConstantData,
@@ -1666,7 +1666,7 @@ LockDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
int *pConstantData,
@@ -1679,7 +1679,7 @@ LockDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
const BOOL *pConstantData,
@@ -1692,7 +1692,7 @@ LockDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
BOOL *pConstantData,
@@ -1705,7 +1705,7 @@ LockDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetStreamSource( struct NineDevice9 *This,
UINT StreamNumber,
IDirect3DVertexBuffer9 *pStreamData,
@@ -1719,7 +1719,7 @@ LockDevice9_SetStreamSource( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetStreamSource( struct NineDevice9 *This,
UINT StreamNumber,
IDirect3DVertexBuffer9 **ppStreamData,
@@ -1733,7 +1733,7 @@ LockDevice9_GetStreamSource( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
UINT StreamNumber,
UINT Setting )
@@ -1745,7 +1745,7 @@ LockDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetStreamSourceFreq( struct NineDevice9 *This,
UINT StreamNumber,
UINT *pSetting )
@@ -1757,7 +1757,7 @@ LockDevice9_GetStreamSourceFreq( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetIndices( struct NineDevice9 *This,
IDirect3DIndexBuffer9 *pIndexData )
{
@@ -1768,7 +1768,7 @@ LockDevice9_SetIndices( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetIndices( struct NineDevice9 *This,
IDirect3DIndexBuffer9 **ppIndexData )
{
@@ -1779,7 +1779,7 @@ LockDevice9_GetIndices( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreatePixelShader( struct NineDevice9 *This,
const DWORD *pFunction,
IDirect3DPixelShader9 **ppShader )
@@ -1791,7 +1791,7 @@ LockDevice9_CreatePixelShader( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetPixelShader( struct NineDevice9 *This,
IDirect3DPixelShader9 *pShader )
{
@@ -1802,7 +1802,7 @@ LockDevice9_SetPixelShader( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetPixelShader( struct NineDevice9 *This,
IDirect3DPixelShader9 **ppShader )
{
@@ -1813,7 +1813,7 @@ LockDevice9_GetPixelShader( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
const float *pConstantData,
@@ -1826,7 +1826,7 @@ LockDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetPixelShaderConstantF( struct NineDevice9 *This,
UINT StartRegister,
float *pConstantData,
@@ -1839,7 +1839,7 @@ LockDevice9_GetPixelShaderConstantF( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
const int *pConstantData,
@@ -1852,7 +1852,7 @@ LockDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetPixelShaderConstantI( struct NineDevice9 *This,
UINT StartRegister,
int *pConstantData,
@@ -1865,7 +1865,7 @@ LockDevice9_GetPixelShaderConstantI( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
const BOOL *pConstantData,
@@ -1878,7 +1878,7 @@ LockDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_GetPixelShaderConstantB( struct NineDevice9 *This,
UINT StartRegister,
BOOL *pConstantData,
@@ -1891,7 +1891,7 @@ LockDevice9_GetPixelShaderConstantB( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_DrawRectPatch( struct NineDevice9 *This,
UINT Handle,
const float *pNumSegs,
@@ -1904,7 +1904,7 @@ LockDevice9_DrawRectPatch( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_DrawTriPatch( struct NineDevice9 *This,
UINT Handle,
const float *pNumSegs,
@@ -1917,7 +1917,7 @@ LockDevice9_DrawTriPatch( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_DeletePatch( struct NineDevice9 *This,
UINT Handle )
{
@@ -1928,7 +1928,7 @@ LockDevice9_DeletePatch( struct NineDevice9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9_CreateQuery( struct NineDevice9 *This,
D3DQUERYTYPE Type,
IDirect3DQuery9 **ppQuery )
@@ -2062,7 +2062,7 @@ IDirect3DDevice9Vtbl LockDevice9_vtable = {
(void *)LockDevice9_CreateQuery
};
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This,
UINT width,
UINT height,
@@ -2076,7 +2076,7 @@ LockDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_ComposeRects( struct NineDevice9Ex *This,
IDirect3DSurface9 *pSrc,
IDirect3DSurface9 *pDst,
@@ -2094,7 +2094,7 @@ LockDevice9Ex_ComposeRects( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_PresentEx( struct NineDevice9Ex *This,
const RECT *pSourceRect,
const RECT *pDestRect,
@@ -2109,7 +2109,7 @@ LockDevice9Ex_PresentEx( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This,
INT *pPriority )
{
@@ -2120,7 +2120,7 @@ LockDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_SetGPUThreadPriority( struct NineDevice9Ex *This,
INT Priority )
{
@@ -2131,7 +2131,7 @@ LockDevice9Ex_SetGPUThreadPriority( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_WaitForVBlank( struct NineDevice9Ex *This,
UINT iSwapChain )
{
@@ -2142,7 +2142,7 @@ LockDevice9Ex_WaitForVBlank( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This,
IDirect3DResource9 **pResourceArray,
UINT32 NumResources )
@@ -2154,7 +2154,7 @@ LockDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_SetMaximumFrameLatency( struct NineDevice9Ex *This,
UINT MaxLatency )
{
@@ -2165,7 +2165,7 @@ LockDevice9Ex_SetMaximumFrameLatency( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_GetMaximumFrameLatency( struct NineDevice9Ex *This,
UINT *pMaxLatency )
{
@@ -2176,7 +2176,7 @@ LockDevice9Ex_GetMaximumFrameLatency( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This,
HWND hDestinationWindow )
{
@@ -2187,7 +2187,7 @@ LockDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This,
UINT Width,
UINT Height,
@@ -2206,7 +2206,7 @@ LockDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This,
UINT Width,
UINT Height,
@@ -2223,7 +2223,7 @@ LockDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This,
UINT Width,
UINT Height,
@@ -2242,7 +2242,7 @@ LockDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_ResetEx( struct NineDevice9Ex *This,
D3DPRESENT_PARAMETERS *pPresentationParameters,
D3DDISPLAYMODEEX *pFullscreenDisplayMode )
@@ -2254,7 +2254,7 @@ LockDevice9Ex_ResetEx( struct NineDevice9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This,
UINT iSwapChain,
D3DDISPLAYMODEEX *pMode,
@@ -2404,7 +2404,7 @@ IDirect3DDevice9ExVtbl LockDevice9Ex_vtable = {
(void *)LockDevice9Ex_GetDisplayModeEx
};
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This,
const GUID *pCryptoType,
const GUID *pDecodeProfile,
@@ -2417,7 +2417,7 @@ LockDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This,
D3DAUTHENTICATEDCHANNELTYPE ChannelType,
IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel,
@@ -2430,7 +2430,7 @@ LockDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockDevice9Video_CreateCryptoSession( struct NineDevice9Video *This,
const GUID *pCryptoType,
const GUID *pDecodeProfile,
@@ -2453,7 +2453,7 @@ IDirect3DDevice9VideoVtbl LockDevice9Video_vtable = {
(void *)LockDevice9Video_CreateCryptoSession
};
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
UINT OffsetToLock,
UINT SizeToLock,
@@ -2467,7 +2467,7 @@ LockIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockIndexBuffer9_Unlock( struct NineIndexBuffer9 *This )
{
HRESULT r;
@@ -2478,7 +2478,7 @@ LockIndexBuffer9_Unlock( struct NineIndexBuffer9 *This )
}
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockIndexBuffer9_GetDesc( struct NineIndexBuffer9 *This,
D3DINDEXBUFFER_DESC *pDesc )
{
@@ -2508,7 +2508,7 @@ IDirect3DIndexBuffer9Vtbl LockIndexBuffer9_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockPixelShader9_GetDevice( struct NinePixelShader9 *This,
IDirect3DDevice9 **ppDevice )
{
@@ -2520,7 +2520,7 @@ LockPixelShader9_GetDevice( struct NinePixelShader9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockPixelShader9_GetFunction( struct NinePixelShader9 *This,
void *pData,
UINT *pSizeOfData )
@@ -2541,7 +2541,7 @@ IDirect3DPixelShader9Vtbl LockPixelShader9_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockQuery9_GetDevice( struct NineQuery9 *This,
IDirect3DDevice9 **ppDevice )
{
@@ -2554,7 +2554,7 @@ LockQuery9_GetDevice( struct NineQuery9 *This,
#endif
#if 0
-static D3DQUERYTYPE WINAPI
+static D3DQUERYTYPE NINE_WINAPI
LockQuery9_GetType( struct NineQuery9 *This )
{
D3DQUERYTYPE r;
@@ -2566,7 +2566,7 @@ LockQuery9_GetType( struct NineQuery9 *This )
#endif
#if 0
-static DWORD WINAPI
+static DWORD NINE_WINAPI
LockQuery9_GetDataSize( struct NineQuery9 *This )
{
DWORD r;
@@ -2577,7 +2577,7 @@ LockQuery9_GetDataSize( struct NineQuery9 *This )
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockQuery9_Issue( struct NineQuery9 *This,
DWORD dwIssueFlags )
{
@@ -2588,7 +2588,7 @@ LockQuery9_Issue( struct NineQuery9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockQuery9_GetData( struct NineQuery9 *This,
void *pData,
DWORD dwSize,
@@ -2613,7 +2613,7 @@ IDirect3DQuery9Vtbl LockQuery9_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockStateBlock9_GetDevice( struct NineStateBlock9 *This,
IDirect3DDevice9 **ppDevice )
{
@@ -2625,7 +2625,7 @@ LockStateBlock9_GetDevice( struct NineStateBlock9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockStateBlock9_Capture( struct NineStateBlock9 *This )
{
HRESULT r;
@@ -2635,7 +2635,7 @@ LockStateBlock9_Capture( struct NineStateBlock9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockStateBlock9_Apply( struct NineStateBlock9 *This )
{
HRESULT r;
@@ -2654,7 +2654,7 @@ IDirect3DStateBlock9Vtbl LockStateBlock9_vtable = {
(void *)LockStateBlock9_Apply
};
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSurface9_GetContainer( struct NineSurface9 *This,
REFIID riid,
void **ppContainer )
@@ -2667,7 +2667,7 @@ LockSurface9_GetContainer( struct NineSurface9 *This,
}
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSurface9_GetDesc( struct NineSurface9 *This,
D3DSURFACE_DESC *pDesc )
{
@@ -2679,7 +2679,7 @@ LockSurface9_GetDesc( struct NineSurface9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSurface9_LockRect( struct NineSurface9 *This,
D3DLOCKED_RECT *pLockedRect,
const RECT *pRect,
@@ -2692,7 +2692,7 @@ LockSurface9_LockRect( struct NineSurface9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSurface9_UnlockRect( struct NineSurface9 *This )
{
HRESULT r;
@@ -2702,7 +2702,7 @@ LockSurface9_UnlockRect( struct NineSurface9 *This )
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSurface9_GetDC( struct NineSurface9 *This,
HDC *phdc )
{
@@ -2713,7 +2713,7 @@ LockSurface9_GetDC( struct NineSurface9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSurface9_ReleaseDC( struct NineSurface9 *This,
HDC hdc )
{
@@ -2744,7 +2744,7 @@ IDirect3DSurface9Vtbl LockSurface9_vtable = {
(void *)LockSurface9_ReleaseDC
};
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9_Present( struct NineSwapChain9 *This,
const RECT *pSourceRect,
const RECT *pDestRect,
@@ -2759,7 +2759,7 @@ LockSwapChain9_Present( struct NineSwapChain9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This,
IDirect3DSurface9 *pDestSurface )
{
@@ -2770,7 +2770,7 @@ LockSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9_GetBackBuffer( struct NineSwapChain9 *This,
UINT iBackBuffer,
D3DBACKBUFFER_TYPE Type,
@@ -2783,7 +2783,7 @@ LockSwapChain9_GetBackBuffer( struct NineSwapChain9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9_GetRasterStatus( struct NineSwapChain9 *This,
D3DRASTER_STATUS *pRasterStatus )
{
@@ -2794,7 +2794,7 @@ LockSwapChain9_GetRasterStatus( struct NineSwapChain9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9_GetDisplayMode( struct NineSwapChain9 *This,
D3DDISPLAYMODE *pMode )
{
@@ -2806,7 +2806,7 @@ LockSwapChain9_GetDisplayMode( struct NineSwapChain9 *This,
}
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9_GetDevice( struct NineSwapChain9 *This,
IDirect3DDevice9 **ppDevice )
{
@@ -2818,7 +2818,7 @@ LockSwapChain9_GetDevice( struct NineSwapChain9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9_GetPresentParameters( struct NineSwapChain9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters )
{
@@ -2842,7 +2842,7 @@ IDirect3DSwapChain9Vtbl LockSwapChain9_vtable = {
(void *)LockSwapChain9_GetPresentParameters
};
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9Ex_GetLastPresentCount( struct NineSwapChain9Ex *This,
UINT *pLastPresentCount )
{
@@ -2853,7 +2853,7 @@ LockSwapChain9Ex_GetLastPresentCount( struct NineSwapChain9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9Ex_GetPresentStats( struct NineSwapChain9Ex *This,
D3DPRESENTSTATS *pPresentationStatistics )
{
@@ -2864,7 +2864,7 @@ LockSwapChain9Ex_GetPresentStats( struct NineSwapChain9Ex *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockSwapChain9Ex_GetDisplayModeEx( struct NineSwapChain9Ex *This,
D3DDISPLAYMODEEX *pMode,
D3DDISPLAYROTATION *pRotation )
@@ -2893,7 +2893,7 @@ IDirect3DSwapChain9ExVtbl LockSwapChain9Ex_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockTexture9_GetLevelDesc( struct NineTexture9 *This,
UINT Level,
D3DSURFACE_DESC *pDesc )
@@ -2907,7 +2907,7 @@ LockTexture9_GetLevelDesc( struct NineTexture9 *This,
#endif
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockTexture9_GetSurfaceLevel( struct NineTexture9 *This,
UINT Level,
IDirect3DSurface9 **ppSurfaceLevel )
@@ -2920,7 +2920,7 @@ LockTexture9_GetSurfaceLevel( struct NineTexture9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockTexture9_LockRect( struct NineTexture9 *This,
UINT Level,
D3DLOCKED_RECT *pLockedRect,
@@ -2934,7 +2934,7 @@ LockTexture9_LockRect( struct NineTexture9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockTexture9_UnlockRect( struct NineTexture9 *This,
UINT Level )
{
@@ -2945,7 +2945,7 @@ LockTexture9_UnlockRect( struct NineTexture9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockTexture9_AddDirtyRect( struct NineTexture9 *This,
const RECT *pDirtyRect )
{
@@ -2981,7 +2981,7 @@ IDirect3DTexture9Vtbl LockTexture9_vtable = {
(void *)LockTexture9_AddDirtyRect
};
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
UINT OffsetToLock,
UINT SizeToLock,
@@ -2995,7 +2995,7 @@ LockVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVertexBuffer9_Unlock( struct NineVertexBuffer9 *This )
{
HRESULT r;
@@ -3006,7 +3006,7 @@ LockVertexBuffer9_Unlock( struct NineVertexBuffer9 *This )
}
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVertexBuffer9_GetDesc( struct NineVertexBuffer9 *This,
D3DVERTEXBUFFER_DESC *pDesc )
{
@@ -3036,7 +3036,7 @@ IDirect3DVertexBuffer9Vtbl LockVertexBuffer9_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVertexDeclaration9_GetDevice( struct NineVertexDeclaration9 *This,
IDirect3DDevice9 **ppDevice )
{
@@ -3048,7 +3048,7 @@ LockVertexDeclaration9_GetDevice( struct NineVertexDeclaration9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9 *This,
D3DVERTEXELEMENT9 *pElement,
UINT *pNumElements )
@@ -3069,7 +3069,7 @@ IDirect3DVertexDeclaration9Vtbl LockVertexDeclaration9_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVertexShader9_GetDevice( struct NineVertexShader9 *This,
IDirect3DDevice9 **ppDevice )
{
@@ -3081,7 +3081,7 @@ LockVertexShader9_GetDevice( struct NineVertexShader9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVertexShader9_GetFunction( struct NineVertexShader9 *This,
void *pData,
UINT *pSizeOfData )
@@ -3102,7 +3102,7 @@ IDirect3DVertexShader9Vtbl LockVertexShader9_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolume9_GetDevice( struct NineVolume9 *This,
IDirect3DDevice9 **ppDevice )
{
@@ -3114,7 +3114,7 @@ LockVolume9_GetDevice( struct NineVolume9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolume9_SetPrivateData( struct NineVolume9 *This,
REFGUID refguid,
const void *pData,
@@ -3128,7 +3128,7 @@ LockVolume9_SetPrivateData( struct NineVolume9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolume9_GetPrivateData( struct NineVolume9 *This,
REFGUID refguid,
void *pData,
@@ -3141,7 +3141,7 @@ LockVolume9_GetPrivateData( struct NineVolume9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolume9_FreePrivateData( struct NineVolume9 *This,
REFGUID refguid )
{
@@ -3152,7 +3152,7 @@ LockVolume9_FreePrivateData( struct NineVolume9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolume9_GetContainer( struct NineVolume9 *This,
REFIID riid,
void **ppContainer )
@@ -3165,7 +3165,7 @@ LockVolume9_GetContainer( struct NineVolume9 *This,
}
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolume9_GetDesc( struct NineVolume9 *This,
D3DVOLUME_DESC *pDesc )
{
@@ -3177,7 +3177,7 @@ LockVolume9_GetDesc( struct NineVolume9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolume9_LockBox( struct NineVolume9 *This,
D3DLOCKED_BOX *pLockedVolume,
const D3DBOX *pBox,
@@ -3190,7 +3190,7 @@ LockVolume9_LockBox( struct NineVolume9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolume9_UnlockBox( struct NineVolume9 *This )
{
HRESULT r;
@@ -3215,7 +3215,7 @@ IDirect3DVolume9Vtbl LockVolume9_vtable = {
};
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This,
UINT Level,
D3DVOLUME_DESC *pDesc )
@@ -3229,7 +3229,7 @@ LockVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This,
#endif
#if 0
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This,
UINT Level,
IDirect3DVolume9 **ppVolumeLevel )
@@ -3242,7 +3242,7 @@ LockVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This,
}
#endif
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolumeTexture9_LockBox( struct NineVolumeTexture9 *This,
UINT Level,
D3DLOCKED_BOX *pLockedVolume,
@@ -3256,7 +3256,7 @@ LockVolumeTexture9_LockBox( struct NineVolumeTexture9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This,
UINT Level )
{
@@ -3267,7 +3267,7 @@ LockVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This,
return r;
}
-static HRESULT WINAPI
+static HRESULT NINE_WINAPI
LockVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This,
const D3DBOX *pDirtyBox )
{
diff --git a/src/gallium/state_trackers/nine/nine_pipe.h b/src/gallium/state_trackers/nine/nine_pipe.h
index 86117866ed5..1ffce7dc1d7 100644
--- a/src/gallium/state_trackers/nine/nine_pipe.h
+++ b/src/gallium/state_trackers/nine/nine_pipe.h
@@ -324,7 +324,8 @@ d3d9_to_pipe_format_checked(struct pipe_screen *screen,
enum pipe_texture_target target,
unsigned sample_count,
unsigned bindings,
- boolean srgb)
+ boolean srgb,
+ boolean bypass_check)
{
enum pipe_format result;
@@ -335,7 +336,10 @@ d3d9_to_pipe_format_checked(struct pipe_screen *screen,
if (srgb)
result = util_format_srgb(result);
- if (format_check_internal(result))
+ /* bypass_check: Used for D3DPOOL_SCRATCH, which
+ * isn't limited to the formats supported by the
+ * device. */
+ if (bypass_check || format_check_internal(result))
return result;
/* fallback to another format for formats
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 6f94e378984..f8af969f283 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -23,6 +23,7 @@
#include "device9.h"
#include "basetexture9.h"
+#include "buffer9.h"
#include "indexbuffer9.h"
#include "surface9.h"
#include "vertexdeclaration9.h"
@@ -935,6 +936,16 @@ validate_textures(struct NineDevice9 *device)
}
}
+static void
+update_managed_buffers(struct NineDevice9 *device)
+{
+ struct NineBuffer9 *buf, *ptr;
+ LIST_FOR_EACH_ENTRY_SAFE(buf, ptr, &device->update_buffers, managed.list) {
+ list_delinit(&buf->managed.list);
+ NineBuffer9_Upload(buf);
+ }
+}
+
void
nine_update_state_framebuffer_clear(struct NineDevice9 *device)
{
@@ -962,6 +973,7 @@ nine_update_state(struct NineDevice9 *device)
* may be dirty anyway, even if no texture bindings changed.
*/
validate_textures(device); /* may clobber state */
+ update_managed_buffers(device);
/* ff_update may change VS/PS dirty bits */
if (unlikely(!state->programmable_vs || !state->ps))
diff --git a/src/gallium/state_trackers/nine/nineexoverlayextension.c b/src/gallium/state_trackers/nine/nineexoverlayextension.c
index 2253f8d9789..6a4f287e9a3 100644
--- a/src/gallium/state_trackers/nine/nineexoverlayextension.c
+++ b/src/gallium/state_trackers/nine/nineexoverlayextension.c
@@ -24,7 +24,7 @@
#define DBG_CHANNEL DBG_OVERLAYEXTENSION
-HRESULT WINAPI
+HRESULT NINE_WINAPI
Nine9ExOverlayExtension_CheckDeviceOverlayType( struct Nine9ExOverlayExtension *This,
UINT Adapter,
D3DDEVTYPE DevType,
diff --git a/src/gallium/state_trackers/nine/nineexoverlayextension.h b/src/gallium/state_trackers/nine/nineexoverlayextension.h
index 1616ed0532c..f528f1d3ca9 100644
--- a/src/gallium/state_trackers/nine/nineexoverlayextension.h
+++ b/src/gallium/state_trackers/nine/nineexoverlayextension.h
@@ -35,7 +35,7 @@ Nine9ExOverlayExtension( void *data )
return (struct Nine9ExOverlayExtension *)data;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
Nine9ExOverlayExtension_CheckDeviceOverlayType( struct Nine9ExOverlayExtension *This,
UINT Adapter,
D3DDEVTYPE DevType,
diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c
index 00be67f8955..541919c130c 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.c
+++ b/src/gallium/state_trackers/nine/pixelshader9.c
@@ -116,7 +116,7 @@ NinePixelShader9_dtor( struct NinePixelShader9 *This )
NineUnknown_dtor(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NinePixelShader9_GetFunction( struct NinePixelShader9 *This,
void *pData,
UINT *pSizeOfData )
diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h
index 6b431813a81..1fef1c47dd1 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.h
+++ b/src/gallium/state_trackers/nine/pixelshader9.h
@@ -126,7 +126,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *,
void
NinePixelShader9_dtor( struct NinePixelShader9 * );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NinePixelShader9_GetFunction( struct NinePixelShader9 *This,
void *pData,
UINT *pSizeOfData );
diff --git a/src/gallium/state_trackers/nine/query9.c b/src/gallium/state_trackers/nine/query9.c
index 3afa9007f61..39ac831ba77 100644
--- a/src/gallium/state_trackers/nine/query9.c
+++ b/src/gallium/state_trackers/nine/query9.c
@@ -143,19 +143,19 @@ NineQuery9_dtor( struct NineQuery9 *This )
NineUnknown_dtor(&This->base);
}
-D3DQUERYTYPE WINAPI
+D3DQUERYTYPE NINE_WINAPI
NineQuery9_GetType( struct NineQuery9 *This )
{
return This->type;
}
-DWORD WINAPI
+DWORD NINE_WINAPI
NineQuery9_GetDataSize( struct NineQuery9 *This )
{
return This->result_size;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineQuery9_Issue( struct NineQuery9 *This,
DWORD dwIssueFlags )
{
@@ -197,7 +197,7 @@ union nine_query_result
UINT64 u64;
};
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineQuery9_GetData( struct NineQuery9 *This,
void *pData,
DWORD dwSize,
diff --git a/src/gallium/state_trackers/nine/query9.h b/src/gallium/state_trackers/nine/query9.h
index 9cc1e317055..6397a4c0f6c 100644
--- a/src/gallium/state_trackers/nine/query9.h
+++ b/src/gallium/state_trackers/nine/query9.h
@@ -63,17 +63,17 @@ NineQuery9_ctor( struct NineQuery9 *,
void
NineQuery9_dtor( struct NineQuery9 * );
-D3DQUERYTYPE WINAPI
+D3DQUERYTYPE NINE_WINAPI
NineQuery9_GetType( struct NineQuery9 *This );
-DWORD WINAPI
+DWORD NINE_WINAPI
NineQuery9_GetDataSize( struct NineQuery9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineQuery9_Issue( struct NineQuery9 *This,
DWORD dwIssueFlags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineQuery9_GetData( struct NineQuery9 *This,
void *pData,
DWORD dwSize,
diff --git a/src/gallium/state_trackers/nine/resource9.c b/src/gallium/state_trackers/nine/resource9.c
index b929c50a83c..d01c2225520 100644
--- a/src/gallium/state_trackers/nine/resource9.c
+++ b/src/gallium/state_trackers/nine/resource9.c
@@ -138,7 +138,7 @@ NineResource9_GetPool( struct NineResource9 *This )
return This->pool;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineResource9_SetPrivateData( struct NineResource9 *This,
REFGUID refguid,
const void *pData,
@@ -187,7 +187,7 @@ NineResource9_SetPrivateData( struct NineResource9 *This,
return D3DERR_DRIVERINTERNALERROR;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineResource9_GetPrivateData( struct NineResource9 *This,
REFGUID refguid,
void *pData,
@@ -220,7 +220,7 @@ NineResource9_GetPrivateData( struct NineResource9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineResource9_FreePrivateData( struct NineResource9 *This,
REFGUID refguid )
{
@@ -239,7 +239,7 @@ NineResource9_FreePrivateData( struct NineResource9 *This,
return D3D_OK;
}
-DWORD WINAPI
+DWORD NINE_WINAPI
NineResource9_SetPriority( struct NineResource9 *This,
DWORD PriorityNew )
{
@@ -254,7 +254,7 @@ NineResource9_SetPriority( struct NineResource9 *This,
return prev;
}
-DWORD WINAPI
+DWORD NINE_WINAPI
NineResource9_GetPriority( struct NineResource9 *This )
{
if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE)
@@ -264,7 +264,7 @@ NineResource9_GetPriority( struct NineResource9 *This )
}
/* NOTE: Don't forget to adjust locked vtable if you change this ! */
-void WINAPI
+void NINE_WINAPI
NineResource9_PreLoad( struct NineResource9 *This )
{
if (This->pool != D3DPOOL_MANAGED)
@@ -275,7 +275,7 @@ NineResource9_PreLoad( struct NineResource9 *This )
*/
}
-D3DRESOURCETYPE WINAPI
+D3DRESOURCETYPE NINE_WINAPI
NineResource9_GetType( struct NineResource9 *This )
{
return This->type;
diff --git a/src/gallium/state_trackers/nine/resource9.h b/src/gallium/state_trackers/nine/resource9.h
index 8122257b7a7..24a241c00cd 100644
--- a/src/gallium/state_trackers/nine/resource9.h
+++ b/src/gallium/state_trackers/nine/resource9.h
@@ -76,34 +76,34 @@ NineResource9_GetPool( struct NineResource9 *This );
/*** Direct3D public methods ***/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineResource9_SetPrivateData( struct NineResource9 *This,
REFGUID refguid,
const void *pData,
DWORD SizeOfData,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineResource9_GetPrivateData( struct NineResource9 *This,
REFGUID refguid,
void *pData,
DWORD *pSizeOfData );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineResource9_FreePrivateData( struct NineResource9 *This,
REFGUID refguid );
-DWORD WINAPI
+DWORD NINE_WINAPI
NineResource9_SetPriority( struct NineResource9 *This,
DWORD PriorityNew );
-DWORD WINAPI
+DWORD NINE_WINAPI
NineResource9_GetPriority( struct NineResource9 *This );
-void WINAPI
+void NINE_WINAPI
NineResource9_PreLoad( struct NineResource9 *This );
-D3DRESOURCETYPE WINAPI
+D3DRESOURCETYPE NINE_WINAPI
NineResource9_GetType( struct NineResource9 *This );
#endif /* _NINE_RESOURCE9_H_ */
diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c
index 0d1a04b657a..cc368b7f725 100644
--- a/src/gallium/state_trackers/nine/stateblock9.c
+++ b/src/gallium/state_trackers/nine/stateblock9.c
@@ -86,7 +86,7 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This )
*/
static void
nine_state_copy_common(struct nine_state *dst,
- const struct nine_state *src,
+ struct nine_state *src,
struct nine_state *mask, /* aliases either src or dst */
const boolean apply,
struct nine_range_pool *pool)
@@ -267,17 +267,41 @@ nine_state_copy_common(struct nine_state *dst,
}
}
if (mask->changed.group & NINE_STATE_FF_LIGHTING) {
- if (dst->ff.num_lights < mask->ff.num_lights) {
+ unsigned num_lights = MAX2(dst->ff.num_lights, src->ff.num_lights);
+ /* Can happen in Capture() if device state has created new lights after
+ * the stateblock was created.
+ * Can happen in Apply() if the stateblock had recorded the creation of
+ * new lights. */
+ if (dst->ff.num_lights < num_lights) {
dst->ff.light = REALLOC(dst->ff.light,
dst->ff.num_lights * sizeof(D3DLIGHT9),
- mask->ff.num_lights * sizeof(D3DLIGHT9));
- for (i = dst->ff.num_lights; i < mask->ff.num_lights; ++i) {
- memset(&dst->ff.light[i], 0, sizeof(D3DLIGHT9));
- dst->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
+ num_lights * sizeof(D3DLIGHT9));
+ memset(&dst->ff.light[dst->ff.num_lights], 0, (num_lights - dst->ff.num_lights) * sizeof(D3DLIGHT9));
+ /* if mask == dst, a Type of 0 will trigger
+ * "dst->ff.light[i] = src->ff.light[i];" later,
+ * which is what we want in that case. */
+ if (mask != dst) {
+ for (i = src->ff.num_lights; i < num_lights; ++i)
+ src->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
}
- dst->ff.num_lights = mask->ff.num_lights;
+ dst->ff.num_lights = num_lights;
}
- for (i = 0; i < mask->ff.num_lights; ++i)
+ /* Can happen in Capture() if the stateblock had recorded the creation of
+ * new lights.
+ * Can happen in Apply() if device state has created new lights after
+ * the stateblock was created. */
+ if (src->ff.num_lights < num_lights) {
+ src->ff.light = REALLOC(src->ff.light,
+ src->ff.num_lights * sizeof(D3DLIGHT9),
+ num_lights * sizeof(D3DLIGHT9));
+ memset(&src->ff.light[src->ff.num_lights], 0, (num_lights - src->ff.num_lights) * sizeof(D3DLIGHT9));
+ for (i = src->ff.num_lights; i < num_lights; ++i)
+ src->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
+ src->ff.num_lights = num_lights;
+ }
+ /* Note: mask is either src or dst, so at this point src, dst and mask
+ * have num_lights lights. */
+ for (i = 0; i < num_lights; ++i)
if (mask->ff.light[i].Type != NINED3DLIGHT_INVALID)
dst->ff.light[i] = src->ff.light[i];
@@ -446,7 +470,7 @@ nine_state_copy_common_all(struct nine_state *dst,
/* Capture those bits of current device state that have been changed between
* BeginStateBlock and EndStateBlock.
*/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineStateBlock9_Capture( struct NineStateBlock9 *This )
{
struct nine_state *dst = &This->state;
@@ -476,7 +500,7 @@ NineStateBlock9_Capture( struct NineStateBlock9 *This )
}
/* Set state managed by this StateBlock as current device state. */
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineStateBlock9_Apply( struct NineStateBlock9 *This )
{
struct nine_state *dst = &This->base.device->state;
diff --git a/src/gallium/state_trackers/nine/stateblock9.h b/src/gallium/state_trackers/nine/stateblock9.h
index a580c6a2302..ffcc2d99a53 100644
--- a/src/gallium/state_trackers/nine/stateblock9.h
+++ b/src/gallium/state_trackers/nine/stateblock9.h
@@ -62,10 +62,10 @@ NineStateBlock9_ctor( struct NineStateBlock9 *,
void
NineStateBlock9_dtor( struct NineStateBlock9 * );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineStateBlock9_Capture( struct NineStateBlock9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineStateBlock9_Apply( struct NineStateBlock9 *This );
#endif /* _NINE_STATEBLOCK9_H_ */
diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c
index f88b75c3dd7..4c4234bfe27 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -97,7 +97,8 @@ NineSurface9_ctor( struct NineSurface9 *This,
This->base.info.target,
This->base.info.nr_samples,
This->base.info.bind,
- FALSE);
+ FALSE,
+ pDesc->Pool == D3DPOOL_SCRATCH);
if (pDesc->Usage & D3DUSAGE_RENDERTARGET)
This->base.info.bind |= PIPE_BIND_RENDER_TARGET;
@@ -116,13 +117,10 @@ NineSurface9_ctor( struct NineSurface9 *This,
return E_OUTOFMEMORY;
}
- if (pDesc->Pool == D3DPOOL_SYSTEMMEM) {
- This->base.info.usage = PIPE_USAGE_STAGING;
- assert(!pResource);
- } else {
- if (pResource && (pDesc->Usage & D3DUSAGE_DYNAMIC))
- pResource->flags |= NINE_RESOURCE_FLAG_LOCKABLE;
- }
+ assert(pDesc->Pool != D3DPOOL_SYSTEMMEM || !pResource);
+
+ if (pResource && (pDesc->Usage & D3DUSAGE_DYNAMIC))
+ pResource->flags |= NINE_RESOURCE_FLAG_LOCKABLE;
hr = NineResource9_ctor(&This->base, pParams, pResource, FALSE, D3DRTYPE_SURFACE,
pDesc->Pool, pDesc->Usage);
@@ -227,7 +225,7 @@ NineSurface9_Dump( struct NineSurface9 *This )
}
#endif /* DEBUG */
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_GetContainer( struct NineSurface9 *This,
REFIID riid,
void **ppContainer )
@@ -260,7 +258,7 @@ NineSurface9_MarkContainerDirty( struct NineSurface9 *This )
}
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_GetDesc( struct NineSurface9 *This,
D3DSURFACE_DESC *pDesc )
{
@@ -315,7 +313,7 @@ NineSurface9_GetSystemMemPointer(struct NineSurface9 *This, int x, int y)
return This->data + (y * This->stride + x_offset);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_LockRect( struct NineSurface9 *This,
D3DLOCKED_RECT *pLockedRect,
const RECT *pRect,
@@ -426,7 +424,7 @@ NineSurface9_LockRect( struct NineSurface9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_UnlockRect( struct NineSurface9 *This )
{
DBG("This=%p lock_count=%u\n", This, This->lock_count);
@@ -439,14 +437,14 @@ NineSurface9_UnlockRect( struct NineSurface9 *This )
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_GetDC( struct NineSurface9 *This,
HDC *phdc )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_ReleaseDC( struct NineSurface9 *This,
HDC hdc )
{
diff --git a/src/gallium/state_trackers/nine/surface9.h b/src/gallium/state_trackers/nine/surface9.h
index 76156ae699c..7e8f2d35267 100644
--- a/src/gallium/state_trackers/nine/surface9.h
+++ b/src/gallium/state_trackers/nine/surface9.h
@@ -151,29 +151,29 @@ NineSurface9_Dump( struct NineSurface9 *This ) { }
/*** Direct3D public ***/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_GetContainer( struct NineSurface9 *This,
REFIID riid,
void **ppContainer );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_GetDesc( struct NineSurface9 *This,
D3DSURFACE_DESC *pDesc );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_LockRect( struct NineSurface9 *This,
D3DLOCKED_RECT *pLockedRect,
const RECT *pRect,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_UnlockRect( struct NineSurface9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_GetDC( struct NineSurface9 *This,
HDC *phdc );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSurface9_ReleaseDC( struct NineSurface9 *This,
HDC hdc );
diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c
index 82d4173fbb2..1ab339c459c 100644
--- a/src/gallium/state_trackers/nine/swapchain9.c
+++ b/src/gallium/state_trackers/nine/swapchain9.c
@@ -202,7 +202,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
pf = d3d9_to_pipe_format_checked(This->screen, pParams->BackBufferFormat,
PIPE_TEXTURE_2D, pParams->MultiSampleType,
- PIPE_BIND_RENDER_TARGET, FALSE);
+ PIPE_BIND_RENDER_TARGET, FALSE, FALSE);
if (This->actx->linear_framebuffer ||
(pf != PIPE_FORMAT_B8G8R8X8_UNORM &&
@@ -304,7 +304,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
pParams->BackBufferFormat,
PIPE_TEXTURE_2D,
tmplt.nr_samples,
- tmplt.bind, FALSE);
+ tmplt.bind, FALSE, FALSE);
if (tmplt.format == PIPE_FORMAT_NONE)
return D3DERR_INVALIDCALL;
resource = This->screen->resource_create(This->screen, &tmplt);
@@ -360,7 +360,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
PIPE_TEXTURE_2D,
tmplt.nr_samples,
tmplt.bind,
- FALSE);
+ FALSE, FALSE);
if (tmplt.format == PIPE_FORMAT_NONE) {
tmplt.bind &= ~PIPE_BIND_SAMPLER_VIEW;
tmplt.format = d3d9_to_pipe_format_checked(This->screen,
@@ -368,7 +368,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
PIPE_TEXTURE_2D,
tmplt.nr_samples,
tmplt.bind,
- FALSE);
+ FALSE, FALSE);
}
if (tmplt.format == PIPE_FORMAT_NONE)
@@ -764,7 +764,7 @@ bypass_rendering:
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_Present( struct NineSwapChain9 *This,
const RECT *pSourceRect,
const RECT *pDestRect,
@@ -858,7 +858,7 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This,
IDirect3DSurface9 *pDestSurface )
{
@@ -911,7 +911,7 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetBackBuffer( struct NineSwapChain9 *This,
UINT iBackBuffer,
D3DBACKBUFFER_TYPE Type,
@@ -929,7 +929,7 @@ NineSwapChain9_GetBackBuffer( struct NineSwapChain9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetRasterStatus( struct NineSwapChain9 *This,
D3DRASTER_STATUS *pRasterStatus )
{
@@ -939,7 +939,7 @@ NineSwapChain9_GetRasterStatus( struct NineSwapChain9 *This,
return ID3DPresent_GetRasterStatus(This->present, pRasterStatus);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetDisplayMode( struct NineSwapChain9 *This,
D3DDISPLAYMODE *pMode )
{
@@ -961,7 +961,7 @@ NineSwapChain9_GetDisplayMode( struct NineSwapChain9 *This,
return hr;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetPresentParameters( struct NineSwapChain9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters )
{
diff --git a/src/gallium/state_trackers/nine/swapchain9.h b/src/gallium/state_trackers/nine/swapchain9.h
index 4bd74f7b6ec..7714cd52cdd 100644
--- a/src/gallium/state_trackers/nine/swapchain9.h
+++ b/src/gallium/state_trackers/nine/swapchain9.h
@@ -109,7 +109,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
D3DPRESENT_PARAMETERS *pParams,
D3DDISPLAYMODEEX *mode );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_Present( struct NineSwapChain9 *This,
const RECT *pSourceRect,
const RECT *pDestRect,
@@ -117,25 +117,25 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
const RGNDATA *pDirtyRegion,
DWORD dwFlags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This,
IDirect3DSurface9 *pDestSurface );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetBackBuffer( struct NineSwapChain9 *This,
UINT iBackBuffer,
D3DBACKBUFFER_TYPE Type,
IDirect3DSurface9 **ppBackBuffer );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetRasterStatus( struct NineSwapChain9 *This,
D3DRASTER_STATUS *pRasterStatus );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetDisplayMode( struct NineSwapChain9 *This,
D3DDISPLAYMODE *pMode );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9_GetPresentParameters( struct NineSwapChain9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters );
diff --git a/src/gallium/state_trackers/nine/swapchain9ex.c b/src/gallium/state_trackers/nine/swapchain9ex.c
index bcf627f4c22..309402cbb7b 100644
--- a/src/gallium/state_trackers/nine/swapchain9ex.c
+++ b/src/gallium/state_trackers/nine/swapchain9ex.c
@@ -51,21 +51,21 @@ NineSwapChain9Ex_dtor( struct NineSwapChain9Ex *This )
NineSwapChain9_dtor(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9Ex_GetLastPresentCount( struct NineSwapChain9Ex *This,
UINT *pLastPresentCount )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9Ex_GetPresentStats( struct NineSwapChain9Ex *This,
D3DPRESENTSTATS *pPresentationStatistics )
{
STUB(D3DERR_INVALIDCALL);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9Ex_GetDisplayModeEx( struct NineSwapChain9Ex *This,
D3DDISPLAYMODEEX *pMode,
D3DDISPLAYROTATION *pRotation )
diff --git a/src/gallium/state_trackers/nine/swapchain9ex.h b/src/gallium/state_trackers/nine/swapchain9ex.h
index 075f8835222..a15d9d2c01e 100644
--- a/src/gallium/state_trackers/nine/swapchain9ex.h
+++ b/src/gallium/state_trackers/nine/swapchain9ex.h
@@ -45,15 +45,15 @@ NineSwapChain9Ex_new( struct NineDevice9 *pDevice,
D3DDISPLAYMODEEX *mode,
struct NineSwapChain9Ex **ppOut );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9Ex_GetLastPresentCount( struct NineSwapChain9Ex *This,
UINT *pLastPresentCount );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9Ex_GetPresentStats( struct NineSwapChain9Ex *This,
D3DPRESENTSTATS *pPresentationStatistics );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineSwapChain9Ex_GetDisplayModeEx( struct NineSwapChain9Ex *This,
D3DDISPLAYMODEEX *pMode,
D3DDISPLAYROTATION *pRotation );
diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c
index ada08cea90a..3b88e07e976 100644
--- a/src/gallium/state_trackers/nine/texture9.c
+++ b/src/gallium/state_trackers/nine/texture9.c
@@ -48,7 +48,6 @@ NineTexture9_ctor( struct NineTexture9 *This,
{
struct pipe_screen *screen = pParams->device->screen;
struct pipe_resource *info = &This->base.base.info;
- struct pipe_resource *resource;
enum pipe_format pf;
unsigned *level_offsets;
unsigned l;
@@ -61,10 +60,23 @@ NineTexture9_ctor( struct NineTexture9 *This,
nine_D3DUSAGE_to_str(Usage),
d3dformat_to_string(Format), nine_D3DPOOL_to_str(Pool), pSharedHandle);
+ user_assert(Width && Height, D3DERR_INVALIDCALL);
+
+ /* pSharedHandle: can be non-null for ex only.
+ * D3DPOOL_SYSTEMMEM: Levels must be 1
+ * D3DPOOL_DEFAULT: no restriction for Levels
+ * Other Pools are forbidden. */
+ user_assert(!pSharedHandle || pParams->device->ex, D3DERR_INVALIDCALL);
+ user_assert(!pSharedHandle ||
+ (Pool == D3DPOOL_SYSTEMMEM && Levels == 1) ||
+ Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
+
user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP) ||
- (Pool != D3DPOOL_SYSTEMMEM && Levels <= 1), D3DERR_INVALIDCALL);
+ (Pool != D3DPOOL_SYSTEMMEM && Pool != D3DPOOL_SCRATCH && Levels <= 1),
+ D3DERR_INVALIDCALL);
- /* TODO: implement buffer sharing (should work with cross process too)
+ /* TODO: implement pSharedHandle for D3DPOOL_DEFAULT (cross process
+ * buffer sharing).
*
* Gem names may have fit but they're depreciated and won't work on render-nodes.
* One solution is to use shm buffers. We would use a /dev/shm file, fill the first
@@ -77,9 +89,6 @@ NineTexture9_ctor( struct NineTexture9 *This,
* invalid handle, that we would fail to import. Please note that we don't advertise
* the flag indicating the support for that feature, but apps seem to not care.
*/
- user_assert(!pSharedHandle ||
- Pool == D3DPOOL_SYSTEMMEM ||
- Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
if (pSharedHandle && Pool == D3DPOOL_DEFAULT) {
if (!*pSharedHandle) {
@@ -97,7 +106,9 @@ NineTexture9_ctor( struct NineTexture9 *This,
Levels = 0;
pf = d3d9_to_pipe_format_checked(screen, Format, PIPE_TEXTURE_2D, 0,
- PIPE_BIND_SAMPLER_VIEW, FALSE);
+ PIPE_BIND_SAMPLER_VIEW, FALSE,
+ Pool == D3DPOOL_SCRATCH);
+
if (Format != D3DFMT_NULL && pf == PIPE_FORMAT_NONE)
return D3DERR_INVALIDCALL;
@@ -140,12 +151,6 @@ NineTexture9_ctor( struct NineTexture9 *This,
DBG("Application asked for Software Vertex Processing, "
"but this is unimplemented\n");
- if (pSharedHandle)
- info->bind |= PIPE_BIND_SHARED;
-
- if (Pool == D3DPOOL_SYSTEMMEM)
- info->usage = PIPE_USAGE_STAGING;
-
if (pSharedHandle && *pSharedHandle) { /* Pool == D3DPOOL_SYSTEMMEM */
user_buffer = (void *)*pSharedHandle;
level_offsets = alloca(sizeof(unsigned) * (info->last_level + 1));
@@ -188,11 +193,6 @@ NineTexture9_ctor( struct NineTexture9 *This,
sfdesc.MultiSampleType = D3DMULTISAMPLE_NONE;
sfdesc.MultiSampleQuality = 0;
- if (Pool == D3DPOOL_SYSTEMMEM)
- resource = NULL;
- else
- resource = This->base.base.resource;
-
for (l = 0; l <= info->last_level; ++l) {
sfdesc.Width = u_minify(Width, l);
sfdesc.Height = u_minify(Height, l);
@@ -202,7 +202,7 @@ NineTexture9_ctor( struct NineTexture9 *This,
level_offsets[l] : NULL;
hr = NineSurface9_new(This->base.base.base.device, NineUnknown(This),
- resource, user_buffer_for_level,
+ This->base.base.resource, user_buffer_for_level,
D3DRTYPE_TEXTURE, l, 0,
&sfdesc, &This->surfaces[l]);
if (FAILED(hr))
@@ -240,7 +240,7 @@ NineTexture9_dtor( struct NineTexture9 *This )
NineBaseTexture9_dtor(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_GetLevelDesc( struct NineTexture9 *This,
UINT Level,
D3DSURFACE_DESC *pDesc )
@@ -254,7 +254,7 @@ NineTexture9_GetLevelDesc( struct NineTexture9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_GetSurfaceLevel( struct NineTexture9 *This,
UINT Level,
IDirect3DSurface9 **ppSurfaceLevel )
@@ -269,7 +269,7 @@ NineTexture9_GetSurfaceLevel( struct NineTexture9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_LockRect( struct NineTexture9 *This,
UINT Level,
D3DLOCKED_RECT *pLockedRect,
@@ -287,7 +287,7 @@ NineTexture9_LockRect( struct NineTexture9 *This,
pRect, Flags);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_UnlockRect( struct NineTexture9 *This,
UINT Level )
{
@@ -298,7 +298,7 @@ NineTexture9_UnlockRect( struct NineTexture9 *This,
return NineSurface9_UnlockRect(This->surfaces[Level]);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_AddDirtyRect( struct NineTexture9 *This,
const RECT *pDirtyRect )
{
diff --git a/src/gallium/state_trackers/nine/texture9.h b/src/gallium/state_trackers/nine/texture9.h
index 6f80be9ccde..3911f26d468 100644
--- a/src/gallium/state_trackers/nine/texture9.h
+++ b/src/gallium/state_trackers/nine/texture9.h
@@ -48,28 +48,28 @@ NineTexture9_new( struct NineDevice9 *pDevice,
struct NineTexture9 **ppOut,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_GetLevelDesc( struct NineTexture9 *This,
UINT Level,
D3DSURFACE_DESC *pDesc );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_GetSurfaceLevel( struct NineTexture9 *This,
UINT Level,
IDirect3DSurface9 **ppSurfaceLevel );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_LockRect( struct NineTexture9 *This,
UINT Level,
D3DLOCKED_RECT *pLockedRect,
const RECT *pRect,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_UnlockRect( struct NineTexture9 *This,
UINT Level );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineTexture9_AddDirtyRect( struct NineTexture9 *This,
const RECT *pDirtyRect );
diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.c b/src/gallium/state_trackers/nine/vertexbuffer9.c
index 10311b428fe..7c00b2d17e2 100644
--- a/src/gallium/state_trackers/nine/vertexbuffer9.c
+++ b/src/gallium/state_trackers/nine/vertexbuffer9.c
@@ -68,7 +68,7 @@ NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This )
return NineBuffer9_GetResource(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
UINT OffsetToLock,
UINT SizeToLock,
@@ -78,13 +78,13 @@ NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexBuffer9_Unlock( struct NineVertexBuffer9 *This )
{
return NineBuffer9_Unlock(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexBuffer9_GetDesc( struct NineVertexBuffer9 *This,
D3DVERTEXBUFFER_DESC *pDesc )
{
diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.h b/src/gallium/state_trackers/nine/vertexbuffer9.h
index 859402b925b..6caa9bfe326 100644
--- a/src/gallium/state_trackers/nine/vertexbuffer9.h
+++ b/src/gallium/state_trackers/nine/vertexbuffer9.h
@@ -62,17 +62,17 @@ NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This );
/*** Direct3D public ***/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
UINT OffsetToLock,
UINT SizeToLock,
void **ppbData,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexBuffer9_Unlock( struct NineVertexBuffer9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexBuffer9_GetDesc( struct NineVertexBuffer9 *This,
D3DVERTEXBUFFER_DESC *pDesc );
diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.c b/src/gallium/state_trackers/nine/vertexdeclaration9.c
index 36c594b5be3..e478c102b32 100644
--- a/src/gallium/state_trackers/nine/vertexdeclaration9.c
+++ b/src/gallium/state_trackers/nine/vertexdeclaration9.c
@@ -233,7 +233,7 @@ NineVertexDeclaration9_dtor( struct NineVertexDeclaration9 *This )
NineUnknown_dtor(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9 *This,
D3DVERTEXELEMENT9 *pElement,
UINT *pNumElements )
diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.h b/src/gallium/state_trackers/nine/vertexdeclaration9.h
index e39f259440f..9d3b1bdca88 100644
--- a/src/gallium/state_trackers/nine/vertexdeclaration9.h
+++ b/src/gallium/state_trackers/nine/vertexdeclaration9.h
@@ -73,7 +73,7 @@ NineVertexDeclaration9_ctor( struct NineVertexDeclaration9 *This,
void
NineVertexDeclaration9_dtor( struct NineVertexDeclaration9 *This );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9 *This,
D3DVERTEXELEMENT9 *pElement,
UINT *pNumElements );
diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c
index fdfb79a138e..7978ea8925b 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.c
+++ b/src/gallium/state_trackers/nine/vertexshader9.c
@@ -123,7 +123,7 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This )
NineUnknown_dtor(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexShader9_GetFunction( struct NineVertexShader9 *This,
void *pData,
UINT *pSizeOfData )
diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h
index 15c3f4ff041..c89d4a1ee24 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.h
+++ b/src/gallium/state_trackers/nine/vertexshader9.h
@@ -107,7 +107,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *,
void
NineVertexShader9_dtor( struct NineVertexShader9 * );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVertexShader9_GetFunction( struct NineVertexShader9 *This,
void *pData,
UINT *pSizeOfData );
diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c
index f6988923caa..7d54a7ac8cf 100644
--- a/src/gallium/state_trackers/nine/volume9.c
+++ b/src/gallium/state_trackers/nine/volume9.c
@@ -106,7 +106,8 @@ NineVolume9_ctor( struct NineVolume9 *This,
pDesc->Format,
This->info.target,
This->info.nr_samples,
- This->info.bind, FALSE);
+ This->info.bind, FALSE,
+ pDesc->Pool == D3DPOOL_SCRATCH);
if (This->info.format == PIPE_FORMAT_NONE)
return D3DERR_DRIVERINTERNALERROR;
@@ -116,9 +117,6 @@ NineVolume9_ctor( struct NineVolume9 *This,
This->layer_stride = util_format_get_2d_size(This->info.format,
This->stride, pDesc->Height);
- if (pDesc->Pool == D3DPOOL_SYSTEMMEM)
- This->info.usage = PIPE_USAGE_STAGING;
-
if (!This->resource) {
hr = NineVolume9_AllocateData(This);
if (FAILED(hr))
@@ -143,7 +141,7 @@ NineVolume9_dtor( struct NineVolume9 *This )
NineUnknown_dtor(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_GetContainer( struct NineVolume9 *This,
REFIID riid,
void **ppContainer )
@@ -174,7 +172,7 @@ NineVolume9_MarkContainerDirty( struct NineVolume9 *This )
BASETEX_REGISTER_UPDATE(tex);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_GetDesc( struct NineVolume9 *This,
D3DVOLUME_DESC *pDesc )
{
@@ -214,7 +212,7 @@ NineVolume9_GetSystemMemPointer(struct NineVolume9 *This, int x, int y, int z)
return This->data + (z * This->layer_stride + y * This->stride + x_offset);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_LockBox( struct NineVolume9 *This,
D3DLOCKED_BOX *pLockedVolume,
const D3DBOX *pBox,
@@ -308,7 +306,7 @@ NineVolume9_LockBox( struct NineVolume9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_UnlockBox( struct NineVolume9 *This )
{
DBG("This=%p lock_count=%u\n", This, This->lock_count);
@@ -443,7 +441,7 @@ NineVolume9_new( struct NineDevice9 *pDevice,
/*** The boring stuff. TODO: Unify with Resource. ***/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_SetPrivateData( struct NineVolume9 *This,
REFGUID refguid,
const void *pData,
@@ -490,7 +488,7 @@ NineVolume9_SetPrivateData( struct NineVolume9 *This,
return D3DERR_DRIVERINTERNALERROR;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_GetPrivateData( struct NineVolume9 *This,
REFGUID refguid,
void *pData,
@@ -517,7 +515,7 @@ NineVolume9_GetPrivateData( struct NineVolume9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_FreePrivateData( struct NineVolume9 *This,
REFGUID refguid )
{
diff --git a/src/gallium/state_trackers/nine/volume9.h b/src/gallium/state_trackers/nine/volume9.h
index 26ca8a32605..2eb19a5b9e8 100644
--- a/src/gallium/state_trackers/nine/volume9.h
+++ b/src/gallium/state_trackers/nine/volume9.h
@@ -96,39 +96,39 @@ NineVolume9_UploadSelf( struct NineVolume9 *This,
/*** Direct3D public ***/
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_SetPrivateData( struct NineVolume9 *This,
REFGUID refguid,
const void *pData,
DWORD SizeOfData,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_GetPrivateData( struct NineVolume9 *This,
REFGUID refguid,
void *pData,
DWORD *pSizeOfData );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_FreePrivateData( struct NineVolume9 *This,
REFGUID refguid );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_GetContainer( struct NineVolume9 *This,
REFIID riid,
void **ppContainer );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_GetDesc( struct NineVolume9 *This,
D3DVOLUME_DESC *pDesc );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_LockBox( struct NineVolume9 *This,
D3DLOCKED_BOX *pLockedVolume,
const D3DBOX *pBox,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolume9_UnlockBox( struct NineVolume9 *This );
#endif /* _NINE_VOLUME9_H_ */
diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c
index e5b2b53148d..e70f3c12337 100644
--- a/src/gallium/state_trackers/nine/volumetexture9.c
+++ b/src/gallium/state_trackers/nine/volumetexture9.c
@@ -48,19 +48,24 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This,
This, pParams, Width, Height, Depth, Levels,
Usage, Format, Pool, pSharedHandle);
+ user_assert(Width && Height && Depth, D3DERR_INVALIDCALL);
+
+ /* user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); */
+ user_assert(!pSharedHandle, D3DERR_INVALIDCALL); /* TODO */
+
/* An IDirect3DVolume9 cannot be bound as a render target can it ? */
user_assert(!(Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)),
D3DERR_INVALIDCALL);
user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP), D3DERR_INVALIDCALL);
- user_assert(!pSharedHandle, D3DERR_INVALIDCALL); /* TODO */
-
pf = d3d9_to_pipe_format_checked(screen, Format, PIPE_TEXTURE_3D, 0,
- PIPE_BIND_SAMPLER_VIEW, FALSE);
+ PIPE_BIND_SAMPLER_VIEW, FALSE,
+ Pool == D3DPOOL_SCRATCH);
+
if (pf == PIPE_FORMAT_NONE)
return D3DERR_INVALIDCALL;
- /* We support ATI1 and ATI2 hacks only for 2D textures */
+ /* We support ATI1 and ATI2 hacks only for 2D and Cube textures */
if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2)
return D3DERR_INVALIDCALL;
@@ -145,7 +150,7 @@ NineVolumeTexture9_dtor( struct NineVolumeTexture9 *This )
NineBaseTexture9_dtor(&This->base);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This,
UINT Level,
D3DVOLUME_DESC *pDesc )
@@ -157,7 +162,7 @@ NineVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This,
UINT Level,
IDirect3DVolume9 **ppVolumeLevel )
@@ -170,7 +175,7 @@ NineVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This,
return D3D_OK;
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_LockBox( struct NineVolumeTexture9 *This,
UINT Level,
D3DLOCKED_BOX *pLockedVolume,
@@ -186,7 +191,7 @@ NineVolumeTexture9_LockBox( struct NineVolumeTexture9 *This,
Flags);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This,
UINT Level )
{
@@ -197,7 +202,7 @@ NineVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This,
return NineVolume9_UnlockBox(This->volumes[Level]);
}
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This,
const D3DBOX *pDirtyBox )
{
diff --git a/src/gallium/state_trackers/nine/volumetexture9.h b/src/gallium/state_trackers/nine/volumetexture9.h
index b8f250ad72e..dfe564cfc18 100644
--- a/src/gallium/state_trackers/nine/volumetexture9.h
+++ b/src/gallium/state_trackers/nine/volumetexture9.h
@@ -47,28 +47,28 @@ NineVolumeTexture9_new( struct NineDevice9 *pDevice,
struct NineVolumeTexture9 **ppOut,
HANDLE *pSharedHandle );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This,
UINT Level,
D3DVOLUME_DESC *pDesc );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This,
UINT Level,
IDirect3DVolume9 **ppVolumeLevel );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_LockBox( struct NineVolumeTexture9 *This,
UINT Level,
D3DLOCKED_BOX *pLockedVolume,
const D3DBOX *pBox,
DWORD Flags );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This,
UINT Level );
-HRESULT WINAPI
+HRESULT NINE_WINAPI
NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This,
const D3DBOX *pDirtyBox );
diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c
index 5ce12abe227..288cf2ad629 100644
--- a/src/gallium/tests/trivial/compute.c
+++ b/src/gallium/tests/trivial/compute.c
@@ -424,8 +424,17 @@ static void launch_grid(struct context *ctx, const uint *block_layout,
const void *input)
{
struct pipe_context *pipe = ctx->pipe;
+ struct pipe_grid_info info;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ info.block[i] = block_layout[i];
+ info.grid[i] = grid_layout[i];
+ }
+ info.pc = pc;
+ info.input = input;
- pipe->launch_grid(pipe, block_layout, grid_layout, pc, input);
+ pipe->launch_grid(pipe, &info);
}
static void test_default_init(void *p, int s, int x, int y)
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index c44424f4f4a..598ffcbd069 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -122,6 +122,7 @@ nouveau_drm_screen_create(int fd)
case 0xf0:
case 0x100:
case 0x110:
+ case 0x120:
init = nvc0_screen_create;
break;
default:
diff --git a/src/mapi/glapi/gen/EXT_texture_integer.xml b/src/mapi/glapi/gen/EXT_texture_integer.xml
index 5f9ea2963d3..eb6b220ea6b 100644
--- a/src/mapi/glapi/gen/EXT_texture_integer.xml
+++ b/src/mapi/glapi/gen/EXT_texture_integer.xml
@@ -69,25 +69,25 @@
<param name="a" type="GLuint"/>
</function>
- <function name="TexParameterIivEXT" alias="TexParameterIiv">
+ <function name="TexParameterIivEXT" alias="TexParameterIiv" es2="3.0">
<param name="target" type="GLenum"/>
<param name="pname" type="GLenum"/>
<param name="params" type="const GLint *"/>
</function>
- <function name="TexParameterIuivEXT" alias="TexParameterIuiv">
+ <function name="TexParameterIuivEXT" alias="TexParameterIuiv" es2="3.0">
<param name="target" type="GLenum"/>
<param name="pname" type="GLenum"/>
<param name="params" type="const GLuint *"/>
</function>
- <function name="GetTexParameterIivEXT" alias="GetTexParameterIiv">
+ <function name="GetTexParameterIivEXT" alias="GetTexParameterIiv" es2="3.0">
<param name="target" type="GLenum"/>
<param name="pname" type="GLenum"/>
<param name="params" type="GLint *"/>
</function>
- <function name="GetTexParameterIuivEXT" alias="GetTexParameterIuiv">
+ <function name="GetTexParameterIuivEXT" alias="GetTexParameterIuiv" es2="3.0">
<param name="target" type="GLenum"/>
<param name="pname" type="GLenum"/>
<param name="params" type="GLuint *"/>
diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml
index 86df980304b..178f7c027bc 100644
--- a/src/mapi/glapi/gen/es_EXT.xml
+++ b/src/mapi/glapi/gen/es_EXT.xml
@@ -817,6 +817,36 @@
</function>
</category>
+<category name="GL_EXT_texture_border_clamp" number="182">
+
+ <!-- The *TexParameter* functions are added in EXT_texture_integer -->
+
+ <function name="SamplerParameterIivEXT" es2="3.0" alias="SamplerParameterIiv">
+ <param name="sampler" type="GLuint"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="const GLint *"/>
+ </function>
+
+ <function name="SamplerParameterIuivEXT" es2="3.0" alias="SamplerParameterIuiv">
+ <param name="sampler" type="GLuint"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="const GLuint *"/>
+ </function>
+
+ <function name="GetSamplerParameterIivEXT" es2="3.0" alias="GetSamplerParameterIiv">
+ <param name="sampler" type="GLuint"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="GLint *"/>
+ </function>
+
+ <function name="GetSamplerParameterIuivEXT" es2="3.0" alias="GetSamplerParameterIuiv">
+ <param name="sampler" type="GLuint"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="GLfloat *"/>
+ </function>
+
+</category>
+
<category name="GL_EXT_draw_elements_base_vertex" number="204">
<function name="DrawElementsBaseVertexEXT" alias="DrawElementsBaseVertex"
@@ -982,5 +1012,61 @@
<param name="texture" type="GLuint"/>
<param name="level" type="GLint"/>
</function>
- </category>
+</category>
+
+<category name="GL_OES_texture_border_clamp" number="215">
+
+ <enum name="TEXTURE_BORDER_COLOR_OES" value ="0x1004"/>
+ <enum name="CLAMP_TO_BORDER_OES" value="0x812D"/>
+
+ <function name="TexParameterIivOES" es2="3.0" alias="TexParameterIiv">
+ <param name="target" type="GLenum"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="const GLint *"/>
+ </function>
+
+ <function name="TexParameterIuivOES" es2="3.0" alias="TexParameterIuiv">
+ <param name="target" type="GLenum"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="const GLuint *"/>
+ </function>
+
+ <function name="GetTexParameterIivOES" es2="3.0" alias="GetTexParameterIiv">
+ <param name="target" type="GLenum"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="GLint *"/>
+ </function>
+
+ <function name="GetTexParameterIuivOES" es2="3.0" alias="GetTexParameterIuiv">
+ <param name="target" type="GLenum"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="GLuint *"/>
+ </function>
+
+ <function name="SamplerParameterIivOES" es2="3.0" alias="SamplerParameterIiv">
+ <param name="sampler" type="GLuint"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="const GLint *"/>
+ </function>
+
+ <function name="SamplerParameterIuivOES" es2="3.0" alias="SamplerParameterIuiv">
+ <param name="sampler" type="GLuint"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="const GLuint *"/>
+ </function>
+
+ <function name="GetSamplerParameterIivOES" es2="3.0" alias="GetSamplerParameterIiv">
+ <param name="sampler" type="GLuint"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="GLint *"/>
+ </function>
+
+ <function name="GetSamplerParameterIuivOES" es2="3.0" alias="GetSamplerParameterIuiv">
+ <param name="sampler" type="GLuint"/>
+ <param name="pname" type="GLenum"/>
+ <param name="params" type="GLfloat *"/>
+ </function>
+
+</category>
+
</OpenGLAPI>
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 6669f295399..a6c12c64828 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -405,6 +405,7 @@ STATETRACKER_FILES = \
state_tracker/st_atom_depth.c \
state_tracker/st_atom_framebuffer.c \
state_tracker/st_atom.h \
+ state_tracker/st_atom_image.c \
state_tracker/st_atom_msaa.c \
state_tracker/st_atom_pixeltransfer.c \
state_tracker/st_atom_rasterizer.c \
@@ -427,6 +428,8 @@ STATETRACKER_FILES = \
state_tracker/st_cb_bufferobjects.h \
state_tracker/st_cb_clear.c \
state_tracker/st_cb_clear.h \
+ state_tracker/st_cb_compute.c \
+ state_tracker/st_cb_compute.h \
state_tracker/st_cb_condrender.c \
state_tracker/st_cb_condrender.h \
state_tracker/st_cb_copyimage.c \
diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c
index 6534d432e61..57c3f686b0c 100644
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -203,7 +203,7 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
src_tex_image = wrap_renderbuffer(ctx, src_renderbuffer);
}
if (!make_view(ctx, src_tex_image, &src_view_tex_image, &src_view_texture,
- dst_tex_image->InternalFormat))
+ dst_internal_format))
goto cleanup;
}
diff --git a/src/mesa/drivers/dri/i915/intel_tex_validate.c b/src/mesa/drivers/dri/i915/intel_tex_validate.c
index 224a67fc6eb..c2111e5e209 100644
--- a/src/mesa/drivers/dri/i915/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_validate.c
@@ -1,6 +1,7 @@
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/samplerobj.h"
+#include "main/teximage.h"
#include "main/texobj.h"
#include "intel_context.h"
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 45f7a91295d..8f92fd7cfd2 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -175,6 +175,7 @@ i965_FILES = \
gen6_blorp.cpp \
gen6_cc.c \
gen6_clip_state.c \
+ gen6_constant_state.c \
gen6_depth_state.c \
gen6_depthstencil.c \
gen6_gs_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index c7cb3944870..05fff91ed57 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -70,8 +70,13 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
* the destination buffer because we use the standard render path to render
* to destination color buffers, and the standard render path is
* fast-color-aware.
+ * Lossless compression is only introduced for gen9 onwards whereas
+ * blorp is not supported even for gen8. Therefore it should be impossible
+ * to end up here with single sampled compressed surfaces.
*/
- intel_miptree_resolve_color(brw, src_mt);
+ assert(!intel_miptree_is_lossless_compressed(brw, src_mt));
+ assert(!intel_miptree_is_lossless_compressed(brw, dst_mt));
+ intel_miptree_resolve_color(brw, src_mt, 0);
intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_layer);
intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_layer);
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index d9f181a1cbf..0d2aca5b78a 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -35,6 +35,84 @@
static void
+prepare_indirect_gpgpu_walker(struct brw_context *brw)
+{
+ GLintptr indirect_offset = brw->compute.num_work_groups_offset;
+ drm_intel_bo *bo = brw->compute.num_work_groups_bo;
+
+ brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ indirect_offset + 0);
+ brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ indirect_offset + 4);
+ brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ indirect_offset + 8);
+
+ if (brw->gen > 7)
+ return;
+
+ /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */
+ BEGIN_BATCH(7);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2));
+ OUT_BATCH(MI_PREDICATE_SRC0 + 4);
+ OUT_BATCH(0u);
+ OUT_BATCH(MI_PREDICATE_SRC1 + 0);
+ OUT_BATCH(0u);
+ OUT_BATCH(MI_PREDICATE_SRC1 + 4);
+ OUT_BATCH(0u);
+ ADVANCE_BATCH();
+
+ /* Load compute_dispatch_indirect_x_size into SRC0 */
+ brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ indirect_offset + 0);
+
+ /* predicate = (compute_dispatch_indirect_x_size == 0); */
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+ MI_PREDICATE_LOADOP_LOAD |
+ MI_PREDICATE_COMBINEOP_SET |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+ ADVANCE_BATCH();
+
+ /* Load compute_dispatch_indirect_y_size into SRC0 */
+ brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ indirect_offset + 4);
+
+ /* predicate |= (compute_dispatch_indirect_y_size == 0); */
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+ MI_PREDICATE_LOADOP_LOAD |
+ MI_PREDICATE_COMBINEOP_OR |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+ ADVANCE_BATCH();
+
+ /* Load compute_dispatch_indirect_z_size into SRC0 */
+ brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ indirect_offset + 8);
+
+ /* predicate |= (compute_dispatch_indirect_z_size == 0); */
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+ MI_PREDICATE_LOADOP_LOAD |
+ MI_PREDICATE_COMBINEOP_OR |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+ ADVANCE_BATCH();
+
+ /* predicate = !predicate; */
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+ MI_PREDICATE_LOADOP_LOADINV |
+ MI_PREDICATE_COMBINEOP_OR |
+ MI_PREDICATE_COMPAREOP_FALSE);
+ ADVANCE_BATCH();
+}
+
+static void
brw_emit_gpgpu_walker(struct brw_context *brw)
{
const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
@@ -45,20 +123,10 @@ brw_emit_gpgpu_walker(struct brw_context *brw)
if (brw->compute.num_work_groups_bo == NULL) {
indirect_flag = 0;
} else {
- GLintptr indirect_offset = brw->compute.num_work_groups_offset;
- drm_intel_bo *bo = brw->compute.num_work_groups_bo;
-
- indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
-
- brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- indirect_offset + 0);
- brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- indirect_offset + 4);
- brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- indirect_offset + 8);
+ indirect_flag =
+ GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE |
+ (brw->gen == 7 ? GEN7_GPGPU_PREDICATE_ENABLE : 0);
+ prepare_indirect_gpgpu_walker(brw);
}
const unsigned simd_size = prog_data->simd_size;
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 44d2fe4d9e4..31b6b2a3641 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -208,7 +208,11 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
if (!tex_obj || !tex_obj->mt)
continue;
intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
- intel_miptree_resolve_color(brw, tex_obj->mt);
+ /* Sampling engine understands lossless compression and resolving
+ * those surfaces should be skipped for performance reasons.
+ */
+ intel_miptree_resolve_color(brw, tex_obj->mt,
+ INTEL_MIPTREE_IGNORE_CCS_E);
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
}
@@ -223,7 +227,13 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
tex_obj = intel_texture_object(u->TexObj);
if (tex_obj && tex_obj->mt) {
- intel_miptree_resolve_color(brw, tex_obj->mt);
+ /* Access to images is implemented using indirect messages
+ * against data port. Normal render target write understands
+ * lossless compression but unfortunately the typed/untyped
+ * read/write interface doesn't. Therefore the compressed
+ * surfaces need to be resolved prior to accessing them.
+ */
+ intel_miptree_resolve_color(brw, tex_obj->mt, 0);
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
}
}
@@ -252,7 +262,11 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
_mesa_get_srgb_format_linear(mt->format) == mt->format)
continue;
- intel_miptree_resolve_color(brw, mt);
+ /* Lossless compression is not supported for SRGB formats, it
+ * should be impossible to get here with such surfaces.
+ */
+ assert(!intel_miptree_is_lossless_compressed(brw, mt));
+ intel_miptree_resolve_color(brw, mt, 0);
brw_render_cache_set_check_flush(brw, mt->bo);
}
}
@@ -710,6 +724,7 @@ brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_thread
ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
+ ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
}
/**
@@ -1227,7 +1242,7 @@ intel_resolve_for_dri2_flush(struct brw_context *brw,
if (rb == NULL || rb->mt == NULL)
continue;
if (rb->mt->num_samples <= 1)
- intel_miptree_resolve_color(brw, rb->mt);
+ intel_miptree_resolve_color(brw, rb->mt, 0);
else
intel_renderbuffer_downsample(brw, rb);
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 8416a1d4708..8ef5afea149 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2729,6 +2729,7 @@ enum brw_wm_barycentric_interp_mode {
# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6)
+# define GEN9_PS_RENDER_TARGET_RESOLVE_FULL (3 << 6)
# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
@@ -2939,6 +2940,7 @@ enum brw_wm_barycentric_interp_mode {
#define GPGPU_WALKER 0x7105
/* GEN7 DW0 */
# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10)
+# define GEN7_GPGPU_PREDICATE_ENABLE (1 << 8)
/* GEN8+ DW2 */
# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 05872255865..38a27da898c 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -384,20 +384,19 @@ static const struct brw_device_info brw_device_info_bxt = {
.gt = 1,
.has_llc = false,
- /* XXX: These are preliminary thread counts and URB sizes. */
.num_slices = 1,
- .max_vs_threads = 56,
- .max_hs_threads = 56,
- .max_ds_threads = 56,
- .max_gs_threads = 56,
+ .max_vs_threads = 112,
+ .max_hs_threads = 112,
+ .max_ds_threads = 112,
+ .max_gs_threads = 112,
.max_wm_threads = 64 * 3,
- .max_cs_threads = 28,
+ .max_cs_threads = 6 * 6,
.urb = {
- .size = 64,
+ .size = 192,
.min_vs_entries = 34,
- .max_vs_entries = 640,
- .max_hs_entries = 80,
- .max_ds_entries = 80,
+ .max_vs_entries = 704,
+ .max_hs_entries = 256,
+ .max_ds_entries = 416,
.max_gs_entries = 256,
}
};
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 23e71fd9ba6..afa8a4e9eae 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -391,6 +391,15 @@ brw_try_draw_prims(struct gl_context *ctx,
if (ctx->NewState)
_mesa_update_state(ctx);
+ /* We have to validate the textures *before* checking for fallbacks;
+ * otherwise, the software fallback won't be able to rely on the
+ * texture state, the firstLevel and lastLevel fields won't be
+ * set in the intel texture object (they'll both be 0), and the
+ * software fallback will segfault if it attempts to access any
+ * texture level other than level 0.
+ */
+ brw_validate_textures(brw);
+
/* Find the highest sampler unit used by each shader program. A bit-count
* won't work since ARB programs use the texture unit number as the sampler
* index.
@@ -406,15 +415,6 @@ brw_try_draw_prims(struct gl_context *ctx,
brw->vs.base.sampler_count =
_mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed);
- /* We have to validate the textures *before* checking for fallbacks;
- * otherwise, the software fallback won't be able to rely on the
- * texture state, the firstLevel and lastLevel fields won't be
- * set in the intel texture object (they'll both be 0), and the
- * software fallback will segfault if it attempts to access any
- * texture level other than level 0.
- */
- brw_validate_textures(brw);
-
intel_prepare_render(brw);
/* This workaround has to happen outside of brw_upload_render_state()
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index bb22cfa5fab..81a83400ea0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3524,6 +3524,36 @@ fs_visitor::lower_integer_multiplication()
return progress;
}
+bool
+fs_visitor::lower_minmax()
+{
+ assert(devinfo->gen < 6);
+
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ const fs_builder ibld(this, block, inst);
+
+ if (inst->opcode == BRW_OPCODE_SEL &&
+ inst->predicate == BRW_PREDICATE_NONE) {
+ /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of
+ * the original SEL.L/GE instruction
+ */
+ ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
+ inst->conditional_mod);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->conditional_mod = BRW_CONDITIONAL_NONE;
+
+ progress = true;
+ }
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
static void
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
fs_reg *dst, fs_reg color, unsigned components)
@@ -5134,7 +5164,7 @@ fs_visitor::optimize()
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s%d-%s-00-start",
+ snprintf(filename, 64, "%s%d-%s-00-00-start",
stage_abbrev, dispatch_width, nir->info.name);
backend_shader::dump_instructions(filename);
@@ -5187,6 +5217,13 @@ fs_visitor::optimize()
OPT(opt_combine_constants);
OPT(lower_integer_multiplication);
+ if (devinfo->gen <= 5 && OPT(lower_minmax)) {
+ OPT(opt_cmod_propagation);
+ OPT(opt_cse);
+ OPT(opt_copy_propagate);
+ OPT(dead_code_eliminate);
+ }
+
lower_uniform_pull_constant_loads();
validate();
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 89fbbfc90e4..f1a81c13ef9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -175,6 +175,7 @@ public:
bool lower_load_payload();
bool lower_logical_sends();
bool lower_integer_multiplication();
+ bool lower_minmax();
bool lower_simd_width();
bool opt_combine_constants();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index dd3c383a17d..9a3cc3a50c2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -369,20 +369,14 @@ namespace brw {
*
* Generally useful to get the minimum or maximum of two values.
*/
- void
+ instruction *
emit_minmax(const dst_reg &dst, const src_reg &src0,
const src_reg &src1, brw_conditional_mod mod) const
{
assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
- if (shader->devinfo->gen >= 6) {
- set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
- fix_unsigned_negate(src1)));
- } else {
- CMP(null_reg_d(), src0, src1, mod);
- set_predicate(BRW_PREDICATE_NORMAL,
- SEL(dst, src0, src1));
- }
+ return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index fd25307c587..9dbe13df514 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -87,7 +87,7 @@ public:
void setup_initial_values();
void run();
- void dump_block_data() const;
+ void dump_block_data() const UNUSED;
void *mem_ctx;
cfg_t *cfg;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 956cf53fd6c..8611b8dc443 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1015,28 +1015,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_fmin:
case nir_op_imin:
case nir_op_umin:
- if (devinfo->gen >= 6) {
- inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_L;
- } else {
- bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L);
- inst = bld.SEL(result, op[0], op[1]);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
+ inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_L);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fmax:
case nir_op_imax:
case nir_op_umax:
- if (devinfo->gen >= 6) {
- inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- } else {
- bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE);
- inst = bld.SEL(result, op[0], op[1]);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
+ inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_GE);
inst->saturate = instr->dest.saturate;
break;
@@ -1318,7 +1304,7 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
static brw_reg_type
get_image_base_type(const glsl_type *type)
{
- switch ((glsl_base_type)type->sampler_type) {
+ switch ((glsl_base_type)type->sampled_type) {
case GLSL_TYPE_UINT:
return BRW_REGISTER_TYPE_UD;
case GLSL_TYPE_INT:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 9b0750026c4..4adffdd75fb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -1143,7 +1143,7 @@ namespace brw {
if (rsize && pred)
set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
- return tmp;
+ return retype(tmp, src0.type);
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index b2b07e7e58e..488fa6c0c45 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -228,7 +228,9 @@ get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
unsigned int x_align, y_align;
unsigned int x_scaledown, y_scaledown;
- if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) {
+ /* Only single sampled surfaces need to (and actually can) be resolved. */
+ if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE ||
+ intel_miptree_is_lossless_compressed(brw, irb->mt)) {
/* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
* Target(s)", beneath the "Fast Color Clear" bullet (p327):
*
@@ -872,7 +874,10 @@ brw_meta_resolve_color(struct brw_context *brw,
* bits to let us select the type of resolve. For fast clear resolves, it
* turns out we can use the same value as pre-SKL though.
*/
- set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
+ if (intel_miptree_is_lossless_compressed(brw, mt))
+ set_fast_clear_op(brw, GEN9_PS_RENDER_TARGET_RESOLVE_FULL);
+ else
+ set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
get_resolve_rect(brw, mt, &rect);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index f44ccd6e071..6b85eac77d6 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -76,6 +76,7 @@ extern const struct brw_tracked_state brw_tcs_samplers;
extern const struct brw_tracked_state brw_tes_samplers;
extern const struct brw_tracked_state brw_gs_samplers;
extern const struct brw_tracked_state brw_cs_samplers;
+extern const struct brw_tracked_state brw_cs_texture_surfaces;
extern const struct brw_tracked_state brw_vs_ubo_surfaces;
extern const struct brw_tracked_state brw_vs_abo_surfaces;
extern const struct brw_tracked_state brw_vs_image_surfaces;
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index cac06fa8918..c6aa1344270 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -387,8 +387,27 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
/* We need to make sure that the programs get regenerated, since
* any offsets leftover in brw_context will no longer be valid.
*/
- brw->NewGLState |= ~0;
- brw->ctx.NewDriverState |= ~0ull;
+ brw->NewGLState = ~0;
+ brw->ctx.NewDriverState = ~0ull;
+ brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
+ brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
+ brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
+ brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
+
+ /* Also, NULL out any stale program pointers. */
+ brw->vs.prog_data = NULL;
+ brw->vs.base.prog_data = NULL;
+ brw->tcs.prog_data = NULL;
+ brw->tcs.base.prog_data = NULL;
+ brw->tes.prog_data = NULL;
+ brw->tes.base.prog_data = NULL;
+ brw->gs.prog_data = NULL;
+ brw->gs.base.prog_data = NULL;
+ brw->wm.prog_data = NULL;
+ brw->wm.base.prog_data = NULL;
+ brw->cs.prog_data = NULL;
+ brw->cs.base.prog_data = NULL;
+
intel_batchbuffer_flush(brw);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index ee75ca88549..a91d07446ce 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -280,7 +280,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
&brw_cs_pull_constants,
&brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
- &brw_texture_surfaces,
+ &brw_cs_texture_surfaces,
&brw_cs_work_groups_surface,
&brw_cs_samplers,
&brw_cs_state,
@@ -395,7 +395,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
&brw_cs_pull_constants,
&brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
- &brw_texture_surfaces,
+ &brw_cs_texture_surfaces,
&brw_cs_work_groups_surface,
&brw_cs_samplers,
&brw_cs_state,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 2e79de6a883..8136339332b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -26,6 +26,7 @@
#include "brw_cfg.h"
#include "brw_vs.h"
#include "brw_nir.h"
+#include "brw_vec4_builder.h"
#include "brw_vec4_live_variables.h"
#include "brw_dead_control_flow.h"
#include "program/prog_parameter.h"
@@ -1555,11 +1556,6 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
}
}
- if (vs_prog_data->uses_drawid) {
- attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
- nr_attributes++;
- }
-
/* VertexID is stored by the VF as the last vertex element, but we
* don't represent it with a flag in inputs_read, so we call it
* VERT_ATTRIB_MAX.
@@ -1570,6 +1566,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
nr_attributes++;
}
+ if (vs_prog_data->uses_drawid) {
+ attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
+ nr_attributes++;
+ }
+
lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
return payload_reg + vs_prog_data->nr_attributes;
@@ -1624,6 +1625,36 @@ vec4_vs_visitor::setup_payload(void)
this->first_non_payload_grf = reg;
}
+bool
+vec4_visitor::lower_minmax()
+{
+ assert(devinfo->gen < 6);
+
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+ const vec4_builder ibld(this, block, inst);
+
+ if (inst->opcode == BRW_OPCODE_SEL &&
+ inst->predicate == BRW_PREDICATE_NONE) {
+ /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of
+ * the original SEL.L/GE instruction
+ */
+ ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
+ inst->conditional_mod);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->conditional_mod = BRW_CONDITIONAL_NONE;
+
+ progress = true;
+ }
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
src_reg
vec4_visitor::get_timestamp()
{
@@ -1861,7 +1892,7 @@ vec4_visitor::run()
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s-%s-00-start",
+ snprintf(filename, 64, "%s-%s-00-00-start",
stage_abbrev, nir->info.name);
backend_shader::dump_instructions(filename);
@@ -1896,6 +1927,13 @@ vec4_visitor::run()
OPT(dead_code_eliminate);
}
+ if (devinfo->gen <= 5 && OPT(lower_minmax)) {
+ OPT(opt_cmod_propagation);
+ OPT(opt_cse);
+ OPT(opt_copy_propagation);
+ OPT(dead_code_eliminate);
+ }
+
if (failed)
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 6d388b04fdb..7b86e1bc050 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -304,6 +304,8 @@ public:
void resolve_ud_negate(src_reg *reg);
+ bool lower_minmax();
+
src_reg get_timestamp();
void dump_instruction(backend_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
index be1427c7db7..3a8617e05ac 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -66,7 +66,8 @@ namespace brw {
* instruction passed as argument.
*/
vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
- shader(shader), block(block), cursor(inst)
+ shader(shader), block(block), cursor(inst),
+ force_writemask_all(inst->force_writemask_all)
{
annotation.str = inst->annotation;
annotation.ir = inst->ir;
@@ -298,18 +299,14 @@ namespace brw {
*
* Generally useful to get the minimum or maximum of two values.
*/
- void
+ instruction *
emit_minmax(const dst_reg &dst, const src_reg &src0,
const src_reg &src1, brw_conditional_mod mod) const
{
- if (shader->devinfo->gen >= 6) {
- set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
- fix_unsigned_negate(src1)));
- } else {
- CMP(null_reg_d(), src0, src1, mod);
- set_predicate(BRW_PREDICATE_NORMAL,
- SEL(dst, src0, src1));
- }
+ assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
+
+ return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3bb768f82dc..8418a3cdc01 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -679,18 +679,8 @@ vec4_instruction *
vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
src_reg src0, src_reg src1)
{
- vec4_instruction *inst;
-
- if (devinfo->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->conditional_mod = conditionalmod;
- } else {
- emit(CMP(dst, src0, src1, conditionalmod));
-
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-
+ vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+ inst->conditional_mod = conditionalmod;
return inst;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 5ab2f7f09df..f1b8fc4c84d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -872,16 +872,12 @@ brw_update_texture_surfaces(struct brw_context *brw)
/* BRW_NEW_FRAGMENT_PROGRAM */
struct gl_program *fs = (struct gl_program *) brw->fragment_program;
- /* BRW_NEW_COMPUTE_PROGRAM */
- struct gl_program *cs = (struct gl_program *) brw->compute_program;
-
/* _NEW_TEXTURE */
update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
- update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
/* emit alternate set of surface state for gather. this
* allows the surface format to be overriden for only the
@@ -897,8 +893,6 @@ brw_update_texture_surfaces(struct brw_context *brw)
update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
if (fs && fs->UsesGather)
update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
- if (cs && cs->UsesGather)
- update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
}
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
@@ -908,7 +902,6 @@ const struct brw_tracked_state brw_texture_surfaces = {
.dirty = {
.mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
- BRW_NEW_COMPUTE_PROGRAM |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_GEOMETRY_PROGRAM |
@@ -923,6 +916,37 @@ const struct brw_tracked_state brw_texture_surfaces = {
.emit = brw_update_texture_surfaces,
};
+static void
+brw_update_cs_texture_surfaces(struct brw_context *brw)
+{
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ struct gl_program *cs = (struct gl_program *) brw->compute_program;
+
+ /* _NEW_TEXTURE */
+ update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
+
+ /* emit alternate set of surface state for gather. this
+ * allows the surface format to be overriden for only the
+ * gather4 messages.
+ */
+ if (brw->gen < 8) {
+ if (cs && cs->UsesGather)
+ update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
+ }
+
+ brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+}
+
+const struct brw_tracked_state brw_cs_texture_surfaces = {
+ .dirty = {
+ .mesa = _NEW_TEXTURE,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_COMPUTE_PROGRAM,
+ },
+ .emit = brw_update_cs_texture_surfaces,
+};
+
+
void
brw_upload_ubo_surfaces(struct brw_context *brw,
struct gl_shader *shader,
@@ -1340,6 +1364,11 @@ brw_upload_image_surfaces(struct brw_context *brw,
}
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+ /* This may have changed the image metadata dependent on the context
+ * image unit state and passed to the program as uniforms, make sure
+ * that push and pull constants are reuploaded.
+ */
+ brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
}
}
diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c
new file mode 100644
index 00000000000..6c0c32b26f7
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "program/prog_parameter.h"
+
+void
+gen7_upload_constant_state(struct brw_context *brw,
+ const struct brw_stage_state *stage_state,
+ bool active, unsigned opcode)
+{
+ uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0;
+
+ /* Disable if the shader stage is inactive or there are no push constants. */
+ active = active && stage_state->push_const_size != 0;
+
+ int dwords = brw->gen >= 8 ? 11 : 7;
+ BEGIN_BATCH(dwords);
+ OUT_BATCH(opcode << 16 | (dwords - 2));
+
+ /* Workaround for SKL+ (we use option #2 until we have a need for more
+ * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_*
+ *
+ * The driver must ensure The following case does not occur without a flush
+ * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to
+ * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length
+ * not equal to zero committed. Possible ways to avoid this condition
+ * include:
+ * 1. always force buffer 3 to have a non zero read length
+ * 2. always force buffer 0 to a zero read length
+ */
+ if (brw->gen >= 9 && active) {
+ OUT_BATCH(0);
+ OUT_BATCH(stage_state->push_const_size);
+ } else {
+ OUT_BATCH(active ? stage_state->push_const_size : 0);
+ OUT_BATCH(0);
+ }
+ /* Pointer to the constant buffer. Covered by the set of state flags
+ * from gen6_prepare_wm_contants
+ */
+ if (brw->gen >= 9 && active) {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ /* XXX: When using buffers other than 0, you need to specify the
+ * graphics virtual address regardless of INSPM/debug bits
+ */
+ OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0,
+ stage_state->push_const_offset);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else if (brw->gen >= 8) {
+ OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ }
+
+ ADVANCE_BATCH();
+
+ /* On SKL+ the new constants don't take effect until the next corresponding
+ * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
+ * that is sent
+ */
+ if (brw->gen >= 9)
+ brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+}
+
+/**
+ * Creates a streamed BO containing the push constants for the VS or GS on
+ * gen6+.
+ *
+ * Push constants are constant values (such as GLSL uniforms) that are
+ * pre-loaded into a shader stage's register space at thread spawn time.
+ *
+ * Not all GLSL uniforms will be uploaded as push constants: The hardware has
+ * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
+ * uploaded as push constants, while GL 4.4 requires at least 1024 components
+ * to be usable for the VS. Plus, currently we always use pull constants
+ * instead of push constants when doing variable-index array access.
+ *
+ * See brw_curbe.c for the equivalent gen4/5 code.
+ */
+void
+gen6_upload_push_constants(struct brw_context *brw,
+ const struct gl_program *prog,
+ const struct brw_stage_prog_data *prog_data,
+ struct brw_stage_state *stage_state,
+ enum aub_state_struct_type type)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ if (prog_data->nr_params == 0) {
+ stage_state->push_const_size = 0;
+ } else {
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ /* XXX: Should this happen somewhere before to get our state flag set? */
+ if (prog)
+ _mesa_load_state_parameters(ctx, prog->Parameters);
+
+ gl_constant_value *param;
+ int i;
+
+ param = brw_state_batch(brw, type,
+ prog_data->nr_params * sizeof(gl_constant_value),
+ 32, &stage_state->push_const_offset);
+
+ STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+
+ /* _NEW_PROGRAM_CONSTANTS
+ *
+ * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
+ * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
+ * wouldn't be set for them.
+ */
+ for (i = 0; i < prog_data->nr_params; i++) {
+ param[i] = *prog_data->param[i];
+ }
+
+ if (0) {
+ fprintf(stderr, "%s constants:\n",
+ _mesa_shader_stage_to_string(stage_state->stage));
+ for (i = 0; i < prog_data->nr_params; i++) {
+ if ((i & 7) == 0)
+ fprintf(stderr, "g%d: ",
+ prog_data->dispatch_grf_start_reg + i / 8);
+ fprintf(stderr, "%8f ", param[i].f);
+ if ((i & 7) == 7)
+ fprintf(stderr, "\n");
+ }
+ if ((i & 7) != 0)
+ fprintf(stderr, "\n");
+ fprintf(stderr, "\n");
+ }
+
+ stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
+ /* We can only push 32 registers of constants at a time. */
+
+ /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to
+ * 32"
+ *
+ * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
+ *
+ * "The sum of all four read length fields must be less than or
+ * equal to the size of 64"
+ *
+ * The other shader stages all match the VS's limits.
+ */
+ assert(stage_state->push_const_size <= 32);
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 4bc0a8598d6..c18610ca38c 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -33,95 +33,6 @@
#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
-/**
- * Creates a streamed BO containing the push constants for the VS or GS on
- * gen6+.
- *
- * Push constants are constant values (such as GLSL uniforms) that are
- * pre-loaded into a shader stage's register space at thread spawn time.
- *
- * Not all GLSL uniforms will be uploaded as push constants: The hardware has
- * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
- * uploaded as push constants, while GL 4.4 requires at least 1024 components
- * to be usable for the VS. Plus, currently we always use pull constants
- * instead of push constants when doing variable-index array access.
- *
- * See brw_curbe.c for the equivalent gen4/5 code.
- */
-void
-gen6_upload_push_constants(struct brw_context *brw,
- const struct gl_program *prog,
- const struct brw_stage_prog_data *prog_data,
- struct brw_stage_state *stage_state,
- enum aub_state_struct_type type)
-{
- struct gl_context *ctx = &brw->ctx;
-
- if (prog_data->nr_params == 0) {
- stage_state->push_const_size = 0;
- } else {
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- /* XXX: Should this happen somewhere before to get our state flag set? */
- if (prog)
- _mesa_load_state_parameters(ctx, prog->Parameters);
-
- gl_constant_value *param;
- unsigned i;
-
- param = brw_state_batch(brw, type,
- prog_data->nr_params * sizeof(gl_constant_value),
- 32, &stage_state->push_const_offset);
-
- STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
- /* _NEW_PROGRAM_CONSTANTS
- *
- * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
- * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
- * wouldn't be set for them.
- */
- for (i = 0; i < prog_data->nr_params; i++) {
- param[i] = *prog_data->param[i];
- }
-
- if (0) {
- fprintf(stderr, "%s constants:\n",
- _mesa_shader_stage_to_string(stage_state->stage));
- for (i = 0; i < prog_data->nr_params; i++) {
- if ((i & 7) == 0)
- fprintf(stderr, "g%d: ",
- prog_data->dispatch_grf_start_reg + i / 8);
- fprintf(stderr, "%8f ", param[i].f);
- if ((i & 7) == 7)
- fprintf(stderr, "\n");
- }
- if ((i & 7) != 0)
- fprintf(stderr, "\n");
- fprintf(stderr, "\n");
- }
-
- stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
- /* We can only push 32 registers of constants at a time. */
-
- /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
- *
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to
- * 32"
- *
- * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
- *
- * "The sum of all four read length fields must be less than or
- * equal to the size of 64"
- *
- * The other shader stages all match the VS's limits.
- */
- assert(stage_state->push_const_size <= 32);
- }
-}
-
static void
gen6_upload_vs_push_constants(struct brw_context *brw)
{
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index 0c1813f9048..c4babc24f43 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -298,7 +298,12 @@ static struct brw_l3_weights
get_pipeline_state_l3_weights(const struct brw_context *brw)
{
const struct brw_stage_state *stage_states[] = {
- &brw->vs.base, &brw->gs.base, &brw->wm.base, &brw->cs.base
+ [MESA_SHADER_VERTEX] = &brw->vs.base,
+ [MESA_SHADER_TESS_CTRL] = &brw->tcs.base,
+ [MESA_SHADER_TESS_EVAL] = &brw->tes.base,
+ [MESA_SHADER_GEOMETRY] = &brw->gs.base,
+ [MESA_SHADER_FRAGMENT] = &brw->wm.base,
+ [MESA_SHADER_COMPUTE] = &brw->cs.base
};
bool needs_dc = false, needs_slm = false;
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index a18dc697651..0daecdc9ed2 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -29,81 +29,6 @@
#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
-
-void
-gen7_upload_constant_state(struct brw_context *brw,
- const struct brw_stage_state *stage_state,
- bool active, unsigned opcode)
-{
- uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0;
-
- /* Disable if the shader stage is inactive or there are no push constants. */
- active = active && stage_state->push_const_size != 0;
-
- int dwords = brw->gen >= 8 ? 11 : 7;
- BEGIN_BATCH(dwords);
- OUT_BATCH(opcode << 16 | (dwords - 2));
-
- /* Workaround for SKL+ (we use option #2 until we have a need for more
- * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_*
- *
- * The driver must ensure The following case does not occur without a flush
- * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to
- * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length
- * not equal to zero committed. Possible ways to avoid this condition
- * include:
- * 1. always force buffer 3 to have a non zero read length
- * 2. always force buffer 0 to a zero read length
- */
- if (brw->gen >= 9 && active) {
- OUT_BATCH(0);
- OUT_BATCH(stage_state->push_const_size);
- } else {
- OUT_BATCH(active ? stage_state->push_const_size : 0);
- OUT_BATCH(0);
- }
-
- /* Pointer to the constant buffer. Covered by the set of state flags
- * from gen6_prepare_wm_contants
- */
- if (brw->gen >= 9 && active) {
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- /* XXX: When using buffers other than 0, you need to specify the
- * graphics virtual address regardless of INSPM/debug bits
- */
- OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0,
- stage_state->push_const_offset);
- OUT_BATCH(0);
- OUT_BATCH(0);
- } else if (brw->gen>= 8) {
- OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- } else {
- OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- }
-
- ADVANCE_BATCH();
-
- /* On SKL+ the new constants don't take effect until the next corresponding
- * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
- * that is sent
- */
- if (brw->gen >= 9)
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
static void
upload_vs_state(struct brw_context *brw)
{
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index fc8f701a24c..0a52815b8a6 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -197,6 +197,28 @@ gen8_emit_fast_clear_color(struct brw_context *brw,
surf[7] |= mt->fast_clear_color_value;
}
+static uint32_t
+gen8_get_aux_mode(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt,
+ uint32_t surf_type)
+{
+ if (mt->mcs_mt == NULL)
+ return GEN8_SURFACE_AUX_MODE_NONE;
+
+ /*
+ * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
+ * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
+ *
+ * From the hardware spec for GEN9:
+ * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
+ * 16 must be used."
+ */
+ if (brw->gen >= 9 || mt->num_samples == 1)
+ assert(mt->halign == 16);
+
+ return GEN8_SURFACE_AUX_MODE_MCS;
+}
+
static void
gen8_emit_texture_surface_state(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -209,13 +231,13 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
bool rw, bool for_gather)
{
const unsigned depth = max_layer - min_layer;
- struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
+ struct intel_mipmap_tree *aux_mt = mt->mcs_mt;
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
unsigned tiling_mode, pitch;
const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
const uint32_t surf_type = translate_tex_target(target);
+ uint32_t aux_mode = gen8_get_aux_mode(brw, mt, surf_type);
if (mt->format == MESA_FORMAT_S_UINT8) {
tiling_mode = GEN8_SURFACE_TILING_W;
@@ -229,20 +251,9 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
* buffer should always have been resolved before it is used as a texture
* so there is no need for it.
*/
- if (mt->mcs_mt && mt->num_samples > 1) {
- aux_mt = mt->mcs_mt;
- aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
-
- /*
- * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
- * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
- *
- * From the hardware spec for GEN9:
- * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
- * 16 must be used."
- */
- if (brw->gen >= 9 || mt->num_samples == 1)
- assert(mt->halign == 16);
+ if (mt->num_samples <= 1) {
+ aux_mt = NULL;
+ aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
}
uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
@@ -418,8 +429,6 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
struct gl_context *ctx = &brw->ctx;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_mipmap_tree *mt = irb->mt;
- struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
unsigned width = mt->logical_width0;
unsigned height = mt->logical_height0;
unsigned pitch = mt->pitch;
@@ -472,21 +481,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
__func__, _mesa_get_format_name(rb_format));
}
- if (mt->mcs_mt) {
- aux_mt = mt->mcs_mt;
- aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
-
- /*
- * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
- * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
- *
- * From the hardware spec for GEN9:
- * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
- * 16 must be used."
- */
- if (brw->gen >= 9 || mt->num_samples == 1)
- assert(mt->halign == 16);
- }
+ struct intel_mipmap_tree *aux_mt = mt->mcs_mt;
+ const uint32_t aux_mode = gen8_get_aux_mode(brw, mt, surf_type);
uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 6d29fbdde21..72cf9af5b53 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -317,8 +317,8 @@ intel_miptree_blit(struct brw_context *brw,
*/
intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice);
intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice);
- intel_miptree_resolve_color(brw, src_mt);
- intel_miptree_resolve_color(brw, dst_mt);
+ intel_miptree_resolve_color(brw, src_mt, 0);
+ intel_miptree_resolve_color(brw, dst_mt, 0);
if (src_flip)
src_y = minify(src_mt->physical_height0, src_level - src_mt->first_level) - src_y - height;
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c
index dbbac1c95a2..08b7623e63d 100644
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -270,11 +270,11 @@ intel_copy_image_sub_data(struct gl_context *ctx,
*/
intel_miptree_all_slices_resolve_hiz(brw, src_mt);
intel_miptree_all_slices_resolve_depth(brw, src_mt);
- intel_miptree_resolve_color(brw, src_mt);
+ intel_miptree_resolve_color(brw, src_mt, 0);
intel_miptree_all_slices_resolve_hiz(brw, dst_mt);
intel_miptree_all_slices_resolve_depth(brw, dst_mt);
- intel_miptree_resolve_color(brw, dst_mt);
+ intel_miptree_resolve_color(brw, dst_mt, 0);
_mesa_get_format_block_size(src_mt->format, &bw, &bh);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 108dd87dd8b..6c233d84df9 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -161,8 +161,9 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
}
}
-static bool
-intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
+bool
+intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
+ unsigned tiling)
{
/* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
* Target(s)", beneath the "Fast Color Clear" bullet (p326):
@@ -200,9 +201,9 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
* - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
* non-MSRTs only.
*/
-static bool
+bool
intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+ const struct intel_mipmap_tree *mt)
{
/* MCS support does not exist prior to Gen7 */
if (brw->gen < 7)
@@ -266,6 +267,32 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
return true;
}
+/* On Gen9 support for color buffer compression was extended to single
+ * sampled surfaces. This is a helper considering both auxiliary buffer
+ * type and number of samples telling if the given miptree represents
+ * the new single sampled case - also called lossless compression.
+ */
+bool
+intel_miptree_is_lossless_compressed(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt)
+{
+ /* Only available from Gen9 onwards. */
+ if (brw->gen < 9)
+ return false;
+
+ /* Compression always requires auxiliary buffer. */
+ if (!mt->mcs_mt)
+ return false;
+
+ /* Single sample compression is represented re-using msaa compression
+ * layout type: "Compressed Multisampled Surfaces".
+ */
+ if (mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS)
+ return false;
+
+ /* And finally distinguish between msaa and single sample case. */
+ return mt->num_samples <= 1;
+}
/**
* Determine depth format corresponding to a depth+stencil format,
@@ -609,22 +636,21 @@ intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment,
return size;
}
-struct intel_mipmap_tree *
-intel_miptree_create(struct brw_context *brw,
- GLenum target,
- mesa_format format,
- GLuint first_level,
- GLuint last_level,
- GLuint width0,
- GLuint height0,
- GLuint depth0,
- GLuint num_samples,
- uint32_t layout_flags)
+static struct intel_mipmap_tree *
+miptree_create(struct brw_context *brw,
+ GLenum target,
+ mesa_format format,
+ GLuint first_level,
+ GLuint last_level,
+ GLuint width0,
+ GLuint height0,
+ GLuint depth0,
+ GLuint num_samples,
+ uint32_t layout_flags)
{
struct intel_mipmap_tree *mt;
mesa_format tex_format = format;
mesa_format etc_format = MESA_FORMAT_NONE;
- GLuint total_width, total_height;
uint32_t alloc_flags = 0;
format = intel_lower_compressed_format(brw, format);
@@ -645,21 +671,8 @@ intel_miptree_create(struct brw_context *brw,
return NULL;
}
- total_width = mt->total_width;
- total_height = mt->total_height;
-
- if (format == MESA_FORMAT_S_UINT8) {
- /* Align to size of W tile, 64x64. */
- total_width = ALIGN(total_width, 64);
- total_height = ALIGN(total_height, 64);
- }
-
- bool y_or_x = false;
-
- if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) {
- y_or_x = true;
+ if (mt->tiling == (I915_TILING_Y | I915_TILING_X))
mt->tiling = I915_TILING_Y;
- }
if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
alloc_flags |= BO_ALLOC_FOR_RENDER;
@@ -675,26 +688,61 @@ intel_miptree_create(struct brw_context *brw,
mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree",
size, alignment);
} else {
- mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
- total_width, total_height, mt->cpp,
- &mt->tiling, &pitch,
- alloc_flags);
+ if (format == MESA_FORMAT_S_UINT8) {
+ /* Align to size of W tile, 64x64. */
+ mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
+ ALIGN(mt->total_width, 64),
+ ALIGN(mt->total_height, 64),
+ mt->cpp, &mt->tiling, &pitch,
+ alloc_flags);
+ } else {
+ mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
+ mt->total_width, mt->total_height,
+ mt->cpp, &mt->tiling, &pitch,
+ alloc_flags);
+ }
}
mt->pitch = pitch;
+ return mt;
+}
+
+struct intel_mipmap_tree *
+intel_miptree_create(struct brw_context *brw,
+ GLenum target,
+ mesa_format format,
+ GLuint first_level,
+ GLuint last_level,
+ GLuint width0,
+ GLuint height0,
+ GLuint depth0,
+ GLuint num_samples,
+ uint32_t layout_flags)
+{
+ struct intel_mipmap_tree *mt = miptree_create(
+ brw, target, format,
+ first_level, last_level,
+ width0, height0, depth0, num_samples,
+ layout_flags);
+
/* If the BO is too large to fit in the aperture, we need to use the
* BLT engine to support it. Prior to Sandybridge, the BLT paths can't
* handle Y-tiling, so we need to fall back to X.
*/
- if (brw->gen < 6 && y_or_x && mt->bo->size >= brw->max_gtt_map_object_size) {
+ if (brw->gen < 6 && mt->bo->size >= brw->max_gtt_map_object_size &&
+ mt->tiling == I915_TILING_Y) {
+ unsigned long pitch = mt->pitch;
+ const uint32_t alloc_flags =
+ (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) ?
+ BO_ALLOC_FOR_RENDER : 0;
perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
mt->total_width, mt->total_height);
mt->tiling = I915_TILING_X;
drm_intel_bo_unreference(mt->bo);
mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
- total_width, total_height, mt->cpp,
+ mt->total_width, mt->total_height, mt->cpp,
&mt->tiling, &pitch, alloc_flags);
mt->pitch = pitch;
}
@@ -1402,6 +1450,27 @@ intel_miptree_copy_teximage(struct brw_context *brw,
intel_obj->needs_validate = true;
}
+static void
+intel_miptree_init_mcs(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ int init_value)
+{
+ /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
+ *
+ * When MCS buffer is enabled and bound to MSRT, it is required that it
+ * is cleared prior to any rendering.
+ *
+ * Since we don't use the MCS buffer for any purpose other than rendering,
+ * it makes sense to just clear it immediately upon allocation.
+ *
+ * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
+ */
+ void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
+ memset(data, init_value, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
+ intel_miptree_unmap_raw(mt->mcs_mt);
+ mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
+}
+
static bool
intel_miptree_alloc_mcs(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -1447,31 +1516,18 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
*/
const uint32_t mcs_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
MIPTREE_LAYOUT_TILING_Y;
- mt->mcs_mt = intel_miptree_create(brw,
- mt->target,
- format,
- mt->first_level,
- mt->last_level,
- mt->logical_width0,
- mt->logical_height0,
- mt->logical_depth0,
- 0 /* num_samples */,
- mcs_flags);
-
- /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
- *
- * When MCS buffer is enabled and bound to MSRT, it is required that it
- * is cleared prior to any rendering.
- *
- * Since we don't use the MCS buffer for any purpose other than rendering,
- * it makes sense to just clear it immediately upon allocation.
- *
- * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
- */
- void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
- memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
- intel_miptree_unmap_raw(mt->mcs_mt);
- mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
+ mt->mcs_mt = miptree_create(brw,
+ mt->target,
+ format,
+ mt->first_level,
+ mt->last_level,
+ mt->logical_width0,
+ mt->logical_height0,
+ mt->logical_depth0,
+ 0 /* num_samples */,
+ mcs_flags);
+
+ intel_miptree_init_mcs(brw, mt, 0xFF);
return mt->mcs_mt;
}
@@ -1520,16 +1576,16 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
if (brw->gen >= 8) {
layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
}
- mt->mcs_mt = intel_miptree_create(brw,
- mt->target,
- format,
- mt->first_level,
- mt->last_level,
- mcs_width,
- mcs_height,
- mt->logical_depth0,
- 0 /* num_samples */,
- layout_flags);
+ mt->mcs_mt = miptree_create(brw,
+ mt->target,
+ format,
+ mt->first_level,
+ mt->last_level,
+ mcs_width,
+ mcs_height,
+ mt->logical_depth0,
+ 0 /* num_samples */,
+ layout_flags);
return mt->mcs_mt;
}
@@ -1991,8 +2047,17 @@ intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
void
intel_miptree_resolve_color(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+ struct intel_mipmap_tree *mt,
+ int flags)
{
+ /* From gen9 onwards there is new compression scheme for single sampled
+ * surfaces called "lossless compressed". These don't need to be always
+ * resolved.
+ */
+ if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) &&
+ intel_miptree_is_lossless_compressed(brw, mt))
+ return;
+
switch (mt->fast_clear_state) {
case INTEL_FAST_CLEAR_STATE_NO_MCS:
case INTEL_FAST_CLEAR_STATE_RESOLVED:
@@ -2001,8 +2066,10 @@ intel_miptree_resolve_color(struct brw_context *brw,
case INTEL_FAST_CLEAR_STATE_UNRESOLVED:
case INTEL_FAST_CLEAR_STATE_CLEAR:
/* Fast color clear resolves only make sense for non-MSAA buffers. */
- if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE)
+ if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE ||
+ intel_miptree_is_lossless_compressed(brw, mt)) {
brw_meta_resolve_color(brw, mt);
+ }
break;
}
}
@@ -2029,7 +2096,7 @@ intel_miptree_make_shareable(struct brw_context *brw,
assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
if (mt->mcs_mt) {
- intel_miptree_resolve_color(brw, mt);
+ intel_miptree_resolve_color(brw, mt, 0);
intel_miptree_release(&mt->mcs_mt);
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
}
@@ -2137,7 +2204,7 @@ intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
/* CPU accesses to color buffers don't understand fast color clears, so
* resolve any pending fast color clears before we map.
*/
- intel_miptree_resolve_color(brw, mt);
+ intel_miptree_resolve_color(brw, mt, 0);
drm_intel_bo *bo = mt->bo;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 64f73ea9ae5..7cdfb37ff41 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -667,6 +667,18 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height);
bool
+intel_miptree_is_lossless_compressed(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt);
+
+bool
+intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
+ unsigned tiling);
+
+bool
+intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
+ const struct intel_mipmap_tree *mt);
+
+bool
intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
struct intel_mipmap_tree *mt);
@@ -884,9 +896,19 @@ intel_miptree_used_for_rendering(struct intel_mipmap_tree *mt)
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED;
}
+/**
+ * Flag values telling color resolve pass which special types of buffers
+ * can be ignored.
+ *
+ * INTEL_MIPTREE_IGNORE_CCS_E: Lossless compressed (single-sample
+ * compression scheme since gen9)
+ */
+#define INTEL_MIPTREE_IGNORE_CCS_E (1 << 0)
+
void
intel_miptree_resolve_color(struct brw_context *brw,
- struct intel_mipmap_tree *mt);
+ struct intel_mipmap_tree *mt,
+ int flags);
void
intel_miptree_make_shareable(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
index 699e48a297c..54a741395eb 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
@@ -257,7 +257,7 @@ do_blit_bitmap( struct gl_context *ctx,
/* The blitter has no idea about fast color clears, so we need to resolve
* the miptree before we do anything.
*/
- intel_miptree_resolve_color(brw, irb->mt);
+ intel_miptree_resolve_color(brw, irb->mt, 0);
/* Chop it all into chunks that can be digested by hardware: */
for (py = 0; py < height; py += DY) {
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 10d14623fe1..31030b1b4ea 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -155,7 +155,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
/* Since we are going to read raw data to the miptree, we need to resolve
* any pending fast color clears before we start.
*/
- intel_miptree_resolve_color(brw, irb->mt);
+ intel_miptree_resolve_color(brw, irb->mt, 0);
bo = irb->mt->bo;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 5d32a4ce650..e21c3ac543f 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -423,7 +423,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
/* Since we are going to write raw data to the miptree, we need to resolve
* any pending fast color clears before we start.
*/
- intel_miptree_resolve_color(brw, image->mt);
+ intel_miptree_resolve_color(brw, image->mt, 0);
bo = image->mt->bo;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 970ded1e66b..573f701acdd 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -140,7 +140,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
/* Since we are going to write raw data to the miptree, we need to resolve
* any pending fast color clears before we start.
*/
- intel_miptree_resolve_color(brw, image->mt);
+ intel_miptree_resolve_color(brw, image->mt, 0);
bo = image->mt->bo;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index 2cf9c13be33..3a811e10359 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -24,6 +24,7 @@
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/samplerobj.h"
+#include "main/teximage.h"
#include "main/texobj.h"
#include "brw_context.h"
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index 08f13178f84..61f703667aa 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -766,7 +766,7 @@ pop_texture_group(struct gl_context *ctx, struct texture_state *texstate)
_mesa_set_enable(ctx, GL_TEXTURE_2D, !!(unit->Enabled & TEXTURE_2D_BIT));
_mesa_set_enable(ctx, GL_TEXTURE_3D, !!(unit->Enabled & TEXTURE_3D_BIT));
if (ctx->Extensions.ARB_texture_cube_map) {
- _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP_ARB,
+ _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP,
!!(unit->Enabled & TEXTURE_CUBE_BIT));
}
if (ctx->Extensions.NV_texture_rectangle) {
@@ -837,7 +837,7 @@ pop_texture_group(struct gl_context *ctx, struct texture_state *texstate)
/* don't restore state for unsupported targets to prevent
* raising GL errors.
*/
- if (obj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
+ if (obj->Target == GL_TEXTURE_CUBE_MAP &&
!ctx->Extensions.ARB_texture_cube_map) {
continue;
}
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index de1aba44c1b..9aec42508a7 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -3007,8 +3007,8 @@ set_atomic_buffer_binding(struct gl_context *ctx,
_mesa_reference_buffer_object(ctx, &binding->BufferObject, bufObj);
if (bufObj == ctx->Shared->NullBufferObj) {
- binding->Offset = -1;
- binding->Size = -1;
+ binding->Offset = 0;
+ binding->Size = 0;
} else {
binding->Offset = offset;
binding->Size = size;
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 83e238ae825..26dafd1b786 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -58,10 +58,7 @@ supported_buffer_bitmask(const struct gl_context *ctx,
if (_mesa_is_user_fbo(fb)) {
/* A user-created renderbuffer */
- GLuint i;
- for (i = 0; i < ctx->Const.MaxColorAttachments; i++) {
- mask |= (BUFFER_BIT_COLOR0 << i);
- }
+ mask = ((1 << ctx->Const.MaxColorAttachments) - 1) << BUFFER_COLOR0;
}
else {
/* A window system framebuffer */
@@ -159,6 +156,9 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer)
case GL_COLOR_ATTACHMENT7_EXT:
return BUFFER_BIT_COLOR7;
default:
+ /* not an error, but also not supported */
+ if (buffer >= GL_COLOR_ATTACHMENT8 && buffer <= GL_COLOR_ATTACHMENT31)
+ return 1 << BUFFER_COUNT;
/* error */
return BAD_MASK;
}
@@ -171,7 +171,7 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer)
* renderbuffer (a BUFFER_* value).
* return -1 for an invalid buffer.
*/
-static GLint
+static gl_buffer_index
read_buffer_enum_to_index(GLenum buffer)
{
switch (buffer) {
@@ -214,6 +214,9 @@ read_buffer_enum_to_index(GLenum buffer)
case GL_COLOR_ATTACHMENT7_EXT:
return BUFFER_COLOR7;
default:
+ /* not an error, but also not supported */
+ if (buffer >= GL_COLOR_ATTACHMENT8 && buffer <= GL_COLOR_ATTACHMENT31)
+ return BUFFER_COUNT;
/* error */
return -1;
}
@@ -221,7 +224,7 @@ read_buffer_enum_to_index(GLenum buffer)
/**
- * Called by glDrawBuffer().
+ * Called by glDrawBuffer() and glNamedFramebufferDrawBuffer().
* Specify which renderbuffer(s) to draw into for the first color output.
* <buffer> can name zero, one, two or four renderbuffers!
* \sa _mesa_DrawBuffers
@@ -242,9 +245,9 @@ read_buffer_enum_to_index(GLenum buffer)
*
* See the GL_EXT_framebuffer_object spec for more info.
*/
-void
-_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, const char *caller)
+static void
+draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
+ GLenum buffer, const char *caller)
{
GLbitfield destMask;
@@ -293,7 +296,7 @@ void GLAPIENTRY
_mesa_DrawBuffer(GLenum buffer)
{
GET_CURRENT_CONTEXT(ctx);
- _mesa_draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer");
+ draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer");
}
@@ -312,22 +315,22 @@ _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf)
else
fb = ctx->WinSysDrawBuffer;
- _mesa_draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer");
+ draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer");
}
/**
- * Called by glDrawBuffersARB; specifies the destination color renderbuffers
- * for N fragment program color outputs.
+ * Called by glDrawBuffersARB() and glNamedFramebufferDrawBuffers() to specify
+ * the destination color renderbuffers for N fragment program color outputs.
* \sa _mesa_DrawBuffer
* \param n number of outputs
* \param buffers array [n] of renderbuffer names. Unlike glDrawBuffer, the
* names cannot specify more than one buffer. For example,
* GL_FRONT_AND_BACK is illegal.
*/
-void
-_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLsizei n, const GLenum *buffers, const char *caller)
+static void
+draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
+ GLsizei n, const GLenum *buffers, const char *caller)
{
GLuint output;
GLbitfield usedBufferMask, supportedMask;
@@ -502,7 +505,7 @@ void GLAPIENTRY
_mesa_DrawBuffers(GLsizei n, const GLenum *buffers)
{
GET_CURRENT_CONTEXT(ctx);
- _mesa_draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers");
+ draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers");
}
@@ -522,7 +525,7 @@ _mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n,
else
fb = ctx->WinSysDrawBuffer;
- _mesa_draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers");
+ draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers");
}
@@ -545,8 +548,8 @@ updated_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb)
/**
- * Helper function to set the GL_DRAW_BUFFER state in the context and
- * current FBO. Called via glDrawBuffer(), glDrawBuffersARB()
+ * Helper function to set the GL_DRAW_BUFFER state for the given context and
+ * FBO. Called via glDrawBuffer(), glDrawBuffersARB()
*
* All error checking will have been done prior to calling this function
* so nothing should go wrong at this point.
@@ -662,14 +665,17 @@ _mesa_update_draw_buffers(struct gl_context *ctx)
/**
* Like \sa _mesa_drawbuffers(), this is a helper function for setting
- * GL_READ_BUFFER state in the context and current FBO.
+ * GL_READ_BUFFER state for the given context and FBO.
+ * Note that all error checking should have been done before calling
+ * this function.
* \param ctx the rendering context
+ * \param fb the framebuffer object to update
* \param buffer GL_FRONT, GL_BACK, GL_COLOR_ATTACHMENT0, etc.
* \param bufferIndex the numerical index corresponding to 'buffer'
*/
void
_mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, GLint bufferIndex)
+ GLenum buffer, gl_buffer_index bufferIndex)
{
if ((fb == ctx->ReadBuffer) && _mesa_is_winsys_fbo(fb)) {
/* Only update the per-context READ_BUFFER state if we're bound to
@@ -687,15 +693,16 @@ _mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
/**
- * Called by glReadBuffer to set the source renderbuffer for reading pixels.
+ * Called by glReadBuffer and glNamedFramebufferReadBuffer to set the source
+ * renderbuffer for reading pixels.
* \param mode color buffer such as GL_FRONT, GL_BACK, etc.
*/
-void
-_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, const char *caller)
+static void
+read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
+ GLenum buffer, const char *caller)
{
GLbitfield supportedMask;
- GLint srcBuffer;
+ gl_buffer_index srcBuffer;
FLUSH_VERTICES(ctx, 0);
@@ -740,7 +747,7 @@ void GLAPIENTRY
_mesa_ReadBuffer(GLenum buffer)
{
GET_CURRENT_CONTEXT(ctx);
- _mesa_read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer");
+ read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer");
}
@@ -759,5 +766,5 @@ _mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src)
else
fb = ctx->WinSysReadBuffer;
- _mesa_read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer");
+ read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer");
}
diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h
index 5aa79fda54b..9df08154688 100644
--- a/src/mesa/main/buffers.h
+++ b/src/mesa/main/buffers.h
@@ -34,13 +34,11 @@
#include "glheader.h"
+#include "mtypes.h"
struct gl_context;
struct gl_framebuffer;
-extern void
-_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, const char *caller);
extern void GLAPIENTRY
_mesa_DrawBuffer( GLenum mode );
@@ -48,10 +46,6 @@ _mesa_DrawBuffer( GLenum mode );
extern void GLAPIENTRY
_mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf);
-extern void
-_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLsizei n, const GLenum *buffers, const char *caller);
-
extern void GLAPIENTRY
_mesa_DrawBuffers(GLsizei n, const GLenum *buffers);
@@ -66,16 +60,12 @@ _mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb,
extern void
_mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, GLint bufferIndex);
+ GLenum buffer, gl_buffer_index bufferIndex);
extern void
_mesa_update_draw_buffers(struct gl_context *ctx);
-extern void
-_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, const char *caller);
-
extern void GLAPIENTRY
_mesa_ReadBuffer( GLenum mode );
diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c
index 3bfcc5c0e39..92f69ab9b59 100644
--- a/src/mesa/main/clear.c
+++ b/src/mesa/main/clear.c
@@ -325,18 +325,6 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value)
_mesa_update_state( ctx );
}
- /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
- * of the OpenGL 4.5 spec states:
- *
- * "An INVALID_ENUM error is generated by ClearBufferiv and
- * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL."
- */
- if (buffer == GL_DEPTH || buffer == GL_DEPTH_STENCIL) {
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glClearBufferiv(buffer=GL_DEPTH || GL_DEPTH_STENCIL)");
- return;
- }
-
switch (buffer) {
case GL_STENCIL:
/* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
@@ -386,26 +374,13 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value)
}
}
break;
- case GL_DEPTH:
- /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
- *
- * "The result of ClearBuffer is undefined if no conversion between
- * the type of the specified value and the type of the buffer being
- * cleared is defined (for example, if ClearBufferiv is called for a
- * fixed- or floating-point buffer, or if ClearBufferfv is called
- * for a signed or unsigned integer buffer). This is not an error."
+ default:
+ /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
+ * of the OpenGL 4.5 spec states:
*
- * In this case we take "undefined" and "not an error" to mean "ignore."
- * Note that we still need to generate an error for the invalid
- * drawbuffer case (see the GL_STENCIL case above).
+ * "An INVALID_ENUM error is generated by ClearBufferiv and
+ * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL."
*/
- if (drawbuffer != 0) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferiv(drawbuffer=%d)",
- drawbuffer);
- return;
- }
- return;
- default:
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferiv(buffer=%s)",
_mesa_enum_to_string(buffer));
return;
@@ -470,32 +445,13 @@ _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value)
}
}
break;
- case GL_DEPTH:
- case GL_STENCIL:
- /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
- *
- * "The result of ClearBuffer is undefined if no conversion between
- * the type of the specified value and the type of the buffer being
- * cleared is defined (for example, if ClearBufferiv is called for a
- * fixed- or floating-point buffer, or if ClearBufferfv is called
- * for a signed or unsigned integer buffer). This is not an error."
- *
- * In this case we take "undefined" and "not an error" to mean "ignore."
- * Even though we could do something sensible for GL_STENCIL, page 263
- * (page 279 of the PDF) says:
- *
- * "Only ClearBufferiv should be used to clear stencil buffers."
+ default:
+ /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
+ * of the OpenGL 4.5 spec states:
*
- * Note that we still need to generate an error for the invalid
- * drawbuffer case (see the GL_STENCIL case in _mesa_ClearBufferiv).
+ * "An INVALID_ENUM error is generated by ClearBufferuiv and
+ * ClearNamedFramebufferuiv if buffer is not COLOR."
*/
- if (drawbuffer != 0) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferuiv(drawbuffer=%d)",
- drawbuffer);
- return;
- }
- return;
- default:
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferuiv(buffer=%s)",
_mesa_enum_to_string(buffer));
return;
@@ -587,26 +543,13 @@ _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value)
}
}
break;
- case GL_STENCIL:
- /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
- *
- * "The result of ClearBuffer is undefined if no conversion between
- * the type of the specified value and the type of the buffer being
- * cleared is defined (for example, if ClearBufferiv is called for a
- * fixed- or floating-point buffer, or if ClearBufferfv is called
- * for a signed or unsigned integer buffer). This is not an error."
+ default:
+ /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
+ * of the OpenGL 4.5 spec states:
*
- * In this case we take "undefined" and "not an error" to mean "ignore."
- * Note that we still need to generate an error for the invalid
- * drawbuffer case (see the GL_DEPTH case above).
+ * "An INVALID_ENUM error is generated by ClearBufferfv and
+ * ClearNamedFramebufferfv if buffer is not COLOR or DEPTH."
*/
- if (drawbuffer != 0) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferfv(drawbuffer=%d)",
- drawbuffer);
- return;
- }
- return;
- default:
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfv(buffer=%s)",
_mesa_enum_to_string(buffer));
return;
diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c
index 53e7a500f61..b71430f2b12 100644
--- a/src/mesa/main/compute.c
+++ b/src/mesa/main/compute.c
@@ -41,6 +41,9 @@ _mesa_DispatchCompute(GLuint num_groups_x,
if (!_mesa_validate_DispatchCompute(ctx, num_groups))
return;
+ if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u)
+ return;
+
ctx->Driver.DispatchCompute(ctx, num_groups);
}
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 2d53e2fa72f..820ae072da6 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -288,17 +288,6 @@
#define PERFQUERY_HAVE_GPA_EXTENDED_COUNTERS 0
/*@}*/
-/** For GL_ARB_compute_shader */
-/*@{*/
-#define MAX_COMPUTE_UNIFORM_BLOCKS 12
-#define MAX_COMPUTE_TEXTURE_IMAGE_UNITS 16
-#define MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 8
-#define MAX_COMPUTE_ATOMIC_COUNTERS 8
-#define MAX_COMPUTE_SHARED_MEMORY_SIZE 32768
-#define MAX_COMPUTE_UNIFORM_COMPONENTS 512
-#define MAX_COMPUTE_IMAGE_UNIFORMS 8
-/*@}*/
-
/** For GL_ARB_pipeline_statistics_query */
#define MAX_PIPELINE_STATISTICS 11
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 9388a1ca51d..26eee28db4e 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1600,9 +1600,6 @@ _mesa_check_init_viewport(struct gl_context *ctx, GLuint width, GLuint height)
static void
handle_first_current(struct gl_context *ctx)
{
- GLenum buffer;
- GLint bufferIndex;
-
if (ctx->Version == 0) {
/* probably in the process of tearing down the context */
return;
@@ -1617,6 +1614,8 @@ handle_first_current(struct gl_context *ctx)
* For GLES it is always GL_BACK which has a magic interpretation */
if (!ctx->HasConfig && _mesa_is_desktop_gl(ctx)) {
if (ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) {
+ GLenum buffer;
+
if (ctx->DrawBuffer->Visual.doubleBufferMode)
buffer = GL_BACK;
else
@@ -1627,6 +1626,9 @@ handle_first_current(struct gl_context *ctx)
}
if (ctx->ReadBuffer != _mesa_get_incomplete_framebuffer()) {
+ gl_buffer_index bufferIndex;
+ GLenum buffer;
+
if (ctx->ReadBuffer->Visual.doubleBufferMode) {
buffer = GL_BACK;
bufferIndex = BUFFER_BACK_LEFT;
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 19ef3042548..3f5aa5db051 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -35,6 +35,7 @@
#include "glheader.h"
+struct gl_bitmap_atlas;
struct gl_buffer_object;
struct gl_context;
struct gl_display_list;
@@ -154,6 +155,14 @@ struct dd_function_table {
GLint x, GLint y, GLsizei width, GLsizei height,
const struct gl_pixelstore_attrib *unpack,
const GLubyte *bitmap );
+
+ /**
+ * Called by display list code for optimized glCallLists/glBitmap rendering
+ * The driver must support texture rectangles of width 1024 or more.
+ */
+ void (*DrawAtlasBitmaps)(struct gl_context *ctx,
+ const struct gl_bitmap_atlas *atlas,
+ GLuint count, const GLubyte *ids);
/*@}*/
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 0e25efbae72..afd2d83cb59 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -72,6 +72,9 @@
#include "vbo/vbo.h"
+#define USE_BITMAP_ATLAS 1
+
+
/**
* Other parts of Mesa (such as the VBO module) can plug into the display
@@ -606,6 +609,261 @@ void mesa_print_display_list(GLuint list);
/**
+ * Does the given display list only contain a single glBitmap call?
+ */
+static bool
+is_bitmap_list(const struct gl_display_list *dlist)
+{
+ const Node *n = dlist->Head;
+ if (n[0].opcode == OPCODE_BITMAP) {
+ n += InstSize[OPCODE_BITMAP];
+ if (n[0].opcode == OPCODE_END_OF_LIST)
+ return true;
+ }
+ return false;
+}
+
+
+/**
+ * Is the given display list an empty list?
+ */
+static bool
+is_empty_list(const struct gl_display_list *dlist)
+{
+ const Node *n = dlist->Head;
+ return n[0].opcode == OPCODE_END_OF_LIST;
+}
+
+
+/**
+ * Delete/free a gl_bitmap_atlas. Called during context tear-down.
+ */
+void
+_mesa_delete_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas)
+{
+ if (atlas->texObj) {
+ ctx->Driver.DeleteTexture(ctx, atlas->texObj);
+ }
+ free(atlas->glyphs);
+}
+
+
+/**
+ * Lookup a gl_bitmap_atlas by listBase ID.
+ */
+static struct gl_bitmap_atlas *
+lookup_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+ struct gl_bitmap_atlas *atlas;
+
+ assert(listBase > 0);
+ atlas = _mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase);
+ return atlas;
+}
+
+
+/**
+ * Create new bitmap atlas and insert into hash table.
+ */
+static struct gl_bitmap_atlas *
+alloc_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+ struct gl_bitmap_atlas *atlas;
+
+ assert(listBase > 0);
+ assert(_mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase) == NULL);
+
+ atlas = calloc(1, sizeof(*atlas));
+ if (atlas) {
+ _mesa_HashInsert(ctx->Shared->BitmapAtlas, listBase, atlas);
+ }
+
+ return atlas;
+}
+
+
+/**
+ * Try to build a bitmap atlas. This involves examining a sequence of
+ * display lists which contain glBitmap commands and putting the bitmap
+ * images into a texture map (the atlas).
+ * If we succeed, gl_bitmap_atlas::complete will be set to true.
+ * If we fail, gl_bitmap_atlas::incomplete will be set to true.
+ */
+static void
+build_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas,
+ GLuint listBase)
+{
+ unsigned i, row_height = 0, xpos = 0, ypos = 0;
+ GLubyte *map;
+ GLint map_stride;
+
+ assert(atlas);
+ assert(!atlas->complete);
+ assert(atlas->numBitmaps > 0);
+
+ /* We use a rectangle texture (non-normalized coords) for the atlas */
+ assert(ctx->Extensions.NV_texture_rectangle);
+ assert(ctx->Const.MaxTextureRectSize >= 1024);
+
+ atlas->texWidth = 1024;
+ atlas->texHeight = 0; /* determined below */
+
+ atlas->glyphs = malloc(atlas->numBitmaps * sizeof(atlas->glyphs[0]));
+ if (!atlas->glyphs) {
+ /* give up */
+ atlas->incomplete = true;
+ return;
+ }
+
+ /* Loop over the display lists. They should all contain a single glBitmap
+ * call. If not, bail out. Also, compute the position and sizes of each
+ * bitmap in the atlas to determine the texture atlas size.
+ */
+ for (i = 0; i < atlas->numBitmaps; i++) {
+ const struct gl_display_list *list = _mesa_lookup_list(ctx, listBase + i);
+ const Node *n;
+ struct gl_bitmap_glyph *g = &atlas->glyphs[i];
+ unsigned bitmap_width, bitmap_height;
+ float bitmap_xmove, bitmap_ymove, bitmap_xorig, bitmap_yorig;
+
+ if (!list || is_empty_list(list)) {
+ /* stop here */
+ atlas->numBitmaps = i;
+ break;
+ }
+
+ if (!is_bitmap_list(list)) {
+ /* This list does not contain exactly one glBitmap command. Give up. */
+ atlas->incomplete = true;
+ return;
+ }
+
+ /* get bitmap info from the display list command */
+ n = list->Head;
+ assert(n[0].opcode == OPCODE_BITMAP);
+ bitmap_width = n[1].i;
+ bitmap_height = n[2].i;
+ bitmap_xorig = n[3].f;
+ bitmap_yorig = n[4].f;
+ bitmap_xmove = n[5].f;
+ bitmap_ymove = n[6].f;
+
+ if (xpos + bitmap_width > atlas->texWidth) {
+ /* advance to the next row of the texture */
+ xpos = 0;
+ ypos += row_height;
+ row_height = 0;
+ }
+
+ /* save the bitmap's position in the atlas */
+ g->x = xpos;
+ g->y = ypos;
+ g->w = bitmap_width;
+ g->h = bitmap_height;
+ g->xorig = bitmap_xorig;
+ g->yorig = bitmap_yorig;
+ g->xmove = bitmap_xmove;
+ g->ymove = bitmap_ymove;
+
+ xpos += bitmap_width;
+
+ /* keep track of tallest bitmap in the row */
+ row_height = MAX2(row_height, bitmap_height);
+ }
+
+ /* Now we know the texture height */
+ atlas->texHeight = ypos + row_height;
+
+ if (atlas->texHeight == 0) {
+ /* no glyphs found, give up */
+ goto fail;
+ }
+ else if (atlas->texHeight > ctx->Const.MaxTextureRectSize) {
+ /* too large, give up */
+ goto fail;
+ }
+
+ /* Create atlas texture (texture ID is irrelevant) */
+ atlas->texObj = ctx->Driver.NewTextureObject(ctx, 999, GL_TEXTURE_RECTANGLE);
+ if (!atlas->texObj) {
+ goto out_of_memory;
+ }
+
+ atlas->texObj->Sampler.MinFilter = GL_NEAREST;
+ atlas->texObj->Sampler.MagFilter = GL_NEAREST;
+ atlas->texObj->MaxLevel = 0;
+ atlas->texObj->Immutable = GL_TRUE;
+
+ atlas->texImage = _mesa_get_tex_image(ctx, atlas->texObj,
+ GL_TEXTURE_RECTANGLE, 0);
+ if (!atlas->texImage) {
+ goto out_of_memory;
+ }
+
+ _mesa_init_teximage_fields(ctx, atlas->texImage,
+ atlas->texWidth, atlas->texHeight, 1, 0,
+ GL_ALPHA, MESA_FORMAT_A_UNORM8);
+
+ /* alloc image storage */
+ if (!ctx->Driver.AllocTextureImageBuffer(ctx, atlas->texImage)) {
+ goto out_of_memory;
+ }
+
+ /* map teximage, load with bitmap glyphs */
+ ctx->Driver.MapTextureImage(ctx, atlas->texImage, 0,
+ 0, 0, atlas->texWidth, atlas->texHeight,
+ GL_MAP_WRITE_BIT, &map, &map_stride);
+ if (!map) {
+ goto out_of_memory;
+ }
+
+ /* Background/clear pixels are 0xff, foreground/set pixels are 0x0 */
+ memset(map, 0xff, map_stride * atlas->texHeight);
+
+ for (i = 0; i < atlas->numBitmaps; i++) {
+ const struct gl_display_list *list = _mesa_lookup_list(ctx, listBase + i);
+ const Node *n = list->Head;
+
+ assert(n[0].opcode == OPCODE_BITMAP ||
+ n[0].opcode == OPCODE_END_OF_LIST);
+
+ if (n[0].opcode == OPCODE_BITMAP) {
+ unsigned bitmap_width = n[1].i;
+ unsigned bitmap_height = n[2].i;
+ unsigned xpos = atlas->glyphs[i].x;
+ unsigned ypos = atlas->glyphs[i].y;
+ const void *bitmap_image = get_pointer(&n[7]);
+
+ assert(atlas->glyphs[i].w == bitmap_width);
+ assert(atlas->glyphs[i].h == bitmap_height);
+
+ /* put the bitmap image into the texture image */
+ _mesa_expand_bitmap(bitmap_width, bitmap_height,
+ &ctx->DefaultPacking, bitmap_image,
+ map + map_stride * ypos + xpos, /* dest addr */
+ map_stride, 0x0);
+ }
+ }
+
+ ctx->Driver.UnmapTextureImage(ctx, atlas->texImage, 0);
+
+ atlas->complete = true;
+
+ return;
+
+out_of_memory:
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "Display list bitmap atlas");
+fail:
+ if (atlas->texObj) {
+ ctx->Driver.DeleteTexture(ctx, atlas->texObj);
+ }
+ free(atlas->glyphs);
+ atlas->glyphs = NULL;
+ atlas->incomplete = true;
+}
+
+
+/**
* Allocate a gl_display_list object with an initial block of storage.
* \param count how many display list nodes/tokens to allocate
*/
@@ -856,6 +1114,30 @@ _mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist)
/**
+ * Called by _mesa_HashWalk() to check if a display list which is being
+ * deleted belongs to a bitmap texture atlas.
+ */
+static void
+check_atlas_for_deleted_list(GLuint atlas_id, void *data, void *userData)
+{
+ struct gl_bitmap_atlas *atlas = (struct gl_bitmap_atlas *) data;
+ GLuint list_id = *((GLuint *) userData); /* the list being deleted */
+
+ /* See if the list_id falls in the range contained in this texture atlas */
+ if (atlas->complete &&
+ list_id >= atlas_id &&
+ list_id < atlas_id + atlas->numBitmaps) {
+ /* Mark the atlas as incomplete so it doesn't get used. But don't
+ * delete it yet since we don't want to try to recreate it in the next
+ * glCallLists.
+ */
+ atlas->complete = false;
+ atlas->incomplete = true;
+ }
+}
+
+
+/**
* Destroy a display list and remove from hash table.
* \param list - display list number
*/
@@ -871,6 +1153,16 @@ destroy_list(struct gl_context *ctx, GLuint list)
if (!dlist)
return;
+ if (is_bitmap_list(dlist)) {
+ /* If we're destroying a simple glBitmap display list, there's a
+ * chance that we're destroying a bitmap image that's in a texture
+ * atlas. Examine all atlases to see if that's the case. There's
+ * usually few (if any) atlases so this isn't expensive.
+ */
+ _mesa_HashWalk(ctx->Shared->BitmapAtlas,
+ check_atlas_for_deleted_list, &list);
+ }
+
_mesa_delete_list(ctx, dlist);
_mesa_HashRemove(ctx->Shared->DisplayList, list);
}
@@ -8895,6 +9187,18 @@ _mesa_DeleteLists(GLuint list, GLsizei range)
_mesa_error(ctx, GL_INVALID_VALUE, "glDeleteLists");
return;
}
+
+ if (range > 1) {
+ /* We may be deleting a set of bitmap lists. See if there's a
+ * bitmap atlas to free.
+ */
+ struct gl_bitmap_atlas *atlas = lookup_bitmap_atlas(ctx, list);
+ if (atlas) {
+ _mesa_delete_bitmap_atlas(ctx, atlas);
+ _mesa_HashRemove(ctx->Shared->BitmapAtlas, list);
+ }
+ }
+
for (i = list; i < list + range; i++) {
destroy_list(ctx, i);
}
@@ -8936,6 +9240,24 @@ _mesa_GenLists(GLsizei range)
}
}
+ if (USE_BITMAP_ATLAS &&
+ range > 16 &&
+ ctx->Driver.DrawAtlasBitmaps) {
+ /* "range > 16" is a rough heuristic to guess when glGenLists might be
+ * used to allocate display lists for glXUseXFont or wglUseFontBitmaps.
+ * Create the empty atlas now.
+ */
+ struct gl_bitmap_atlas *atlas = lookup_bitmap_atlas(ctx, base);
+ if (!atlas) {
+ atlas = alloc_bitmap_atlas(ctx, base);
+ }
+ if (atlas) {
+ /* Atlas _should_ be new/empty now, but clobbering is OK */
+ assert(atlas->numBitmaps == 0);
+ atlas->numBitmaps = range;
+ }
+ }
+
mtx_unlock(&ctx->Shared->Mutex);
return base;
@@ -9085,6 +9407,65 @@ _mesa_CallList(GLuint list)
/**
+ * Try to execute a glCallLists() command where the display lists contain
+ * glBitmap commands with a texture atlas.
+ * \return true for success, false otherwise
+ */
+static bool
+render_bitmap_atlas(struct gl_context *ctx, GLsizei n, GLenum type,
+ const void *lists)
+{
+ struct gl_bitmap_atlas *atlas;
+ int i;
+
+ if (!USE_BITMAP_ATLAS ||
+ !ctx->Current.RasterPosValid ||
+ ctx->List.ListBase == 0 ||
+ type != GL_UNSIGNED_BYTE ||
+ !ctx->Driver.DrawAtlasBitmaps) {
+ /* unsupported */
+ return false;
+ }
+
+ atlas = lookup_bitmap_atlas(ctx, ctx->List.ListBase);
+
+ if (!atlas) {
+ /* Even if glGenLists wasn't called, we can still try to create
+ * the atlas now.
+ */
+ atlas = alloc_bitmap_atlas(ctx, ctx->List.ListBase);
+ }
+
+ if (atlas && !atlas->complete && !atlas->incomplete) {
+ /* Try to build the bitmap atlas now.
+ * If the atlas was created in glGenLists, we'll have recorded the
+ * number of lists (bitmaps). Otherwise, take a guess at 256.
+ */
+ if (atlas->numBitmaps == 0)
+ atlas->numBitmaps = 256;
+ build_bitmap_atlas(ctx, atlas, ctx->List.ListBase);
+ }
+
+ if (!atlas || !atlas->complete) {
+ return false;
+ }
+
+ /* check that all display list IDs are in the atlas */
+ for (i = 0; i < n; i++) {
+ const GLubyte *ids = (const GLubyte *) lists;
+
+ if (ids[i] >= atlas->numBitmaps) {
+ return false;
+ }
+ }
+
+ ctx->Driver.DrawAtlasBitmaps(ctx, atlas, n, (const GLubyte *) lists);
+
+ return true;
+}
+
+
+/**
* Execute glCallLists: call multiple display lists.
*/
void GLAPIENTRY
@@ -9123,6 +9504,10 @@ _mesa_CallLists(GLsizei n, GLenum type, const GLvoid * lists)
return;
}
+ if (render_bitmap_atlas(ctx, n, type, lists)) {
+ return;
+ }
+
/* Save the CompileFlag status, turn it off, execute display list,
* and restore the CompileFlag.
*/
diff --git a/src/mesa/main/dlist.h b/src/mesa/main/dlist.h
index 7a23208ba5a..22b696f50c1 100644
--- a/src/mesa/main/dlist.h
+++ b/src/mesa/main/dlist.h
@@ -36,6 +36,44 @@
#include "main/mtypes.h"
+/**
+ * Describes the location and size of a glBitmap image in a texture atlas.
+ */
+struct gl_bitmap_glyph
+{
+ unsigned short x, y, w, h; /**< position and size in the texture */
+ float xorig, yorig; /**< bitmap origin */
+ float xmove, ymove; /**< rasterpos move */
+};
+
+
+/**
+ * Describes a set of glBitmap display lists which live in a texture atlas.
+ * The idea is when we see a code sequence of glListBase(b), glCallLists(n)
+ * we're probably drawing bitmap font glyphs. We try to put all the bitmap
+ * glyphs into one texture map then render the glCallLists as a textured
+ * quadstrip.
+ */
+struct gl_bitmap_atlas
+{
+ bool complete; /**< Is the atlas ready to use? */
+ bool incomplete; /**< Did we fail to construct this atlas? */
+
+ unsigned numBitmaps;
+ unsigned texWidth, texHeight;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+
+ unsigned glyphHeight;
+
+ struct gl_bitmap_glyph *glyphs;
+};
+
+void
+_mesa_delete_bitmap_atlas(struct gl_context *ctx,
+ struct gl_bitmap_atlas *atlas);
+
+
GLboolean GLAPIENTRY
_mesa_IsList(GLuint list);
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index 3fd3c2747ea..3985457f21a 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -749,7 +749,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
return;
/* GL_ARB_texture_cube_map */
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
goto invalid_enum_error;
CHECK_EXTENSION(ARB_texture_cube_map, cap);
@@ -1450,7 +1450,7 @@ _mesa_IsEnabled( GLenum cap )
return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled;
/* GL_ARB_texture_cube_map */
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
CHECK_EXTENSION(ARB_texture_cube_map);
return is_texture_enabled(ctx, TEXTURE_CUBE_BIT);
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index d1e3a99fdc0..68f36178f32 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -224,6 +224,7 @@ EXT(EXT_subtexture , dummy_true
EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996)
EXT(EXT_texture3D , dummy_true , GLL, x , x , x , 1996)
EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006)
+EXT(EXT_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014)
EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004)
EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006)
EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004)
@@ -325,6 +326,7 @@ EXT(OES_point_sprite , ARB_point_sprite
EXT(OES_query_matrix , dummy_true , x , x , ES1, x , 2003)
EXT(OES_read_format , dummy_true , GLL, GLC, ES1, x , 2003)
EXT(OES_rgb8_rgba8 , dummy_true , x , x , ES1, ES2, 2005)
+EXT(OES_shader_image_atomic , ARB_shader_image_load_store , x , x , x , 31, 2015)
EXT(OES_single_precision , dummy_true , x , x , ES1, x , 2003)
EXT(OES_standard_derivatives , OES_standard_derivatives , x , x , x , ES2, 2005)
EXT(OES_stencil1 , dummy_false , x , x , x , x , 2005)
@@ -333,6 +335,7 @@ EXT(OES_stencil8 , dummy_true
EXT(OES_stencil_wrap , dummy_true , x , x , ES1, x , 2002)
EXT(OES_surfaceless_context , dummy_true , x , x , ES1, ES2, 2012)
EXT(OES_texture_3D , dummy_true , x , x , x , ES2, 2005)
+EXT(OES_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014)
EXT(OES_texture_cube_map , ARB_texture_cube_map , x , x , ES1, x , 2007)
EXT(OES_texture_env_crossbar , ARB_texture_env_crossbar , x , x , ES1, x , 2005)
EXT(OES_texture_float , OES_texture_float , x , x , x , ES2, 2005)
@@ -341,6 +344,7 @@ EXT(OES_texture_half_float , OES_texture_half_float
EXT(OES_texture_half_float_linear , OES_texture_half_float_linear , x , x , x , ES2, 2005)
EXT(OES_texture_mirrored_repeat , dummy_true , x , x , ES1, x , 2005)
EXT(OES_texture_npot , ARB_texture_non_power_of_two , x , x , ES1, ES2, 2005)
+EXT(OES_texture_stencil8 , ARB_texture_stencil8 , x , x , x , 30, 2014)
EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014)
EXT(OES_vertex_array_object , dummy_true , x , x , ES1, ES2, 2010)
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 1b9b692f001..1f10050c891 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -811,7 +811,7 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format,
break;
}
- baseFormat = _mesa_get_format_base_format(texImage->TexFormat);
+ baseFormat = texImage->_BaseFormat;
if (format == GL_COLOR) {
if (!_mesa_is_legal_color_format(ctx, baseFormat)) {
@@ -868,8 +868,7 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format,
}
}
else if (att->Type == GL_RENDERBUFFER_EXT) {
- const GLenum baseFormat =
- _mesa_get_format_base_format(att->Renderbuffer->Format);
+ const GLenum baseFormat = att->Renderbuffer->_BaseFormat;
assert(att->Renderbuffer);
if (!att->Renderbuffer->InternalFormat ||
diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index 5fdabd5b97f..d16d69c3795 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -179,6 +179,63 @@ _mesa_compute_rgba2base2rgba_component_mapping(GLenum baseFormat, uint8_t *map)
}
}
+
+/**
+ * Special case conversion function to swap r/b channels from the source
+ * image to the dest image.
+ */
+static void
+convert_ubyte_rgba_to_bgra(size_t width, size_t height,
+ const uint8_t *src, size_t src_stride,
+ uint8_t *dst, size_t dst_stride)
+{
+ int row;
+
+ if (sizeof(void *) == 8 &&
+ src_stride % 8 == 0 &&
+ dst_stride % 8 == 0 &&
+ (GLsizeiptr) src % 8 == 0 &&
+ (GLsizeiptr) dst % 8 == 0) {
+ /* use 64-bit word to swizzle two 32-bit pixels. We need 8-byte
+ * alignment for src/dst addresses and strides.
+ */
+ for (row = 0; row < height; row++) {
+ const GLuint64 *s = (const GLuint64 *) src;
+ GLuint64 *d = (GLuint64 *) dst;
+ int i;
+ for (i = 0; i < width/2; i++) {
+ d[i] = ( (s[i] & 0xff00ff00ff00ff00) |
+ ((s[i] & 0xff000000ff) << 16) |
+ ((s[i] & 0xff000000ff0000) >> 16));
+ }
+ if (width & 1) {
+ /* handle the case of odd widths */
+ const GLuint s = ((const GLuint *) src)[width - 1];
+ GLuint *d = (GLuint *) dst + width - 1;
+ *d = ( (s & 0xff00ff00) |
+ ((s & 0xff) << 16) |
+ ((s & 0xff0000) >> 16));
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+ } else {
+ for (row = 0; row < height; row++) {
+ const GLuint *s = (const GLuint *) src;
+ GLuint *d = (GLuint *) dst;
+ int i;
+ for (i = 0; i < width; i++) {
+ d[i] = ( (s[i] & 0xff00ff00) |
+ ((s[i] & 0xff) << 16) |
+ ((s[i] & 0xff0000) >> 16));
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+ }
+}
+
+
/**
* This can be used to convert between most color formats.
*
@@ -299,11 +356,18 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
return;
} else if (src_array_format == RGBA8_UBYTE) {
assert(!_mesa_is_format_integer_color(dst_format));
- for (row = 0; row < height; ++row) {
- _mesa_pack_ubyte_rgba_row(dst_format, width,
- (const uint8_t (*)[4])src, dst);
- src += src_stride;
- dst += dst_stride;
+
+ if (dst_format == MESA_FORMAT_B8G8R8A8_UNORM) {
+ convert_ubyte_rgba_to_bgra(width, height, src, src_stride,
+ dst, dst_stride);
+ }
+ else {
+ for (row = 0; row < height; ++row) {
+ _mesa_pack_ubyte_rgba_row(dst_format, width,
+ (const uint8_t (*)[4])src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ }
}
return;
} else if (src_array_format == RGBA32_UINT &&
diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 85f7b6b5664..816f12bf9e2 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -131,11 +131,14 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
internalformat, buffer);
break;
case GL_NUM_SAMPLE_COUNTS: {
- if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalformat)) {
+ if ((ctx->API == API_OPENGLES2 && ctx->Version == 30) &&
+ _mesa_is_enum_format_integer(internalformat)) {
/* From GL ES 3.0 specification, section 6.1.15 page 236: "Since
* multisampling is not supported for signed and unsigned integer
* internal formats, the value of NUM_SAMPLE_COUNTS will be zero
* for such formats.
+ *
+ * Such a restriction no longer exists in GL ES 3.1.
*/
buffer[0] = 0;
count = 1;
diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c
index 4ec8385ec2f..6c2d31dbcf3 100644
--- a/src/mesa/main/genmipmap.c
+++ b/src/mesa/main/genmipmap.c
@@ -123,7 +123,7 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx,
GLuint face;
for (face = 0; face < 6; face++) {
ctx->Driver.GenerateMipmap(ctx,
- GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB + face, texObj);
+ GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, texObj);
}
}
else {
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 9005dc5897d..f40c5705813 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -642,7 +642,7 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_3D:
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
case GL_TEXTURE_RECTANGLE_NV:
case GL_TEXTURE_EXTERNAL_OES:
v->value_bool = _mesa_IsEnabled(d->pname);
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 164095c103c..07d2d20df7a 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -455,13 +455,13 @@ descriptor=[
# GL_ARB_compute_shader / GLES 3.1
[ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxUniformBlocks), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONTEXT_INT(Const.MaxComputeSharedMemorySize), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms), extra_ARB_compute_shader_es31" ],
[ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ],
[ "MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents), extra_ARB_compute_shader_es31" ],
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index f5284447b6e..987cd0db45c 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -3153,6 +3153,14 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
}
break;
+ case GL_STENCIL_INDEX:
+ if (!_mesa_has_OES_texture_stencil8(ctx) ||
+ type != GL_UNSIGNED_BYTE ||
+ internalFormat != GL_STENCIL_INDEX8) {
+ return GL_INVALID_OPERATION;
+ }
+ break;
+
case GL_ALPHA:
case GL_LUMINANCE:
case GL_LUMINANCE_ALPHA:
diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c
index 99f253cd373..4d6ab6f2b56 100644
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -408,9 +408,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height,
const GLint srcStride = _mesa_image_row_stride(unpack, width,
GL_COLOR_INDEX, GL_BITMAP);
GLint row, col;
-
-#define SET_PIXEL(COL, ROW) \
- destBuffer[(ROW) * destStride + (COL)] = onValue;
+ GLubyte *dstRow = destBuffer;
for (row = 0; row < height; row++) {
const GLubyte *src = srcRow;
@@ -421,7 +419,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height,
for (col = 0; col < width; col++) {
if (*src & mask) {
- SET_PIXEL(col, row);
+ dstRow[col] = onValue;
}
if (mask == 128U) {
@@ -443,7 +441,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height,
for (col = 0; col < width; col++) {
if (*src & mask) {
- SET_PIXEL(col, row);
+ dstRow[col] = onValue;
}
if (mask == 1U) {
@@ -461,9 +459,8 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height,
}
srcRow += srcStride;
+ dstRow += destStride;
} /* row */
-
-#undef SET_PIXEL
}
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 230ebbc67f4..14cd58870f7 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -89,7 +89,7 @@ _mesa_align_malloc(size_t bytes, unsigned long alignment)
if (err)
return NULL;
return mem;
-#elif defined(_WIN32) && defined(_MSC_VER)
+#elif defined(_WIN32)
return _aligned_malloc(bytes, alignment);
#else
uintptr_t ptr, buf;
@@ -131,7 +131,7 @@ _mesa_align_calloc(size_t bytes, unsigned long alignment)
}
return mem;
-#elif defined(_WIN32) && defined(_MSC_VER)
+#elif defined(_WIN32)
void *mem;
mem = _aligned_malloc(bytes, alignment);
@@ -178,7 +178,7 @@ _mesa_align_free(void *ptr)
{
#if defined(HAVE_POSIX_MEMALIGN)
free(ptr);
-#elif defined(_WIN32) && defined(_MSC_VER)
+#elif defined(_WIN32)
_aligned_free(ptr);
#else
if (ptr) {
@@ -196,7 +196,7 @@ void *
_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
unsigned long alignment)
{
-#if defined(_WIN32) && defined(_MSC_VER)
+#if defined(_WIN32)
(void) oldSize;
return _aligned_realloc(oldBuffer, newSize, alignment);
#else
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index ad7af5c1d8c..d96d666e15f 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -83,9 +83,6 @@ typedef union { GLfloat f; GLint i; GLuint u; } fi_type;
#if defined(_MSC_VER)
-#if _MSC_VER < 1800 /* Not req'd on VS2013 and above */
-#define strtoll(p, e, b) _strtoi64(p, e, b)
-#endif /* _MSC_VER < 1800 */
#define strcasecmp(s1, s2) _stricmp(s1, s2)
#endif
/*@}*/
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index 50469956c6e..5a02780b960 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -1715,12 +1715,12 @@ _mesa_generate_mipmap_level(GLenum target,
dstWidth, dstData[0]);
break;
case GL_TEXTURE_2D:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
make_2d_mipmap(datatype, comps, border,
srcWidth, srcHeight, srcData[0], srcRowStride,
dstWidth, dstHeight, dstData[0], dstRowStride);
@@ -1838,12 +1838,7 @@ _mesa_prepare_mipmap_level(struct gl_context *ctx,
for (face = 0; face < numFaces; face++) {
struct gl_texture_image *dstImage;
- GLenum target;
-
- if (numFaces == 1)
- target = texObj->Target;
- else
- target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face;
+ const GLenum target = _mesa_cube_face_target(texObj->Target, face);
dstImage = _mesa_get_tex_image(ctx, texObj, target, level);
if (!dstImage) {
@@ -2024,7 +2019,7 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
/* only two types of compressed textures at this time */
assert(texObj->Target == GL_TEXTURE_2D ||
texObj->Target == GL_TEXTURE_2D_ARRAY ||
- texObj->Target == GL_TEXTURE_CUBE_MAP_ARB ||
+ texObj->Target == GL_TEXTURE_CUBE_MAP ||
texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY);
/*
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a66b56c62bf..2ca9cbf808a 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -44,6 +44,7 @@
#include "math/m_matrix.h" /* GLmatrix */
#include "compiler/shader_enums.h"
#include "main/formats.h" /* MESA_FORMAT_COUNT */
+#include "compiler/glsl/list.h"
#ifdef __cplusplus
@@ -1872,6 +1873,8 @@ typedef enum
PROGRAM_UNDEFINED, /**< Invalid/TBD value */
PROGRAM_IMMEDIATE, /**< Immediate value, used by TGSI */
PROGRAM_BUFFER, /**< for shader buffers, compile-time only */
+ PROGRAM_MEMORY, /**< for shared, global and local memory */
+ PROGRAM_IMAGE, /**< for shader images, compile-time only */
PROGRAM_FILE_MAX
} gl_register_file;
@@ -2044,6 +2047,11 @@ struct gl_compute_program
* Size specified using local_size_{x,y,z}.
*/
unsigned LocalSize[3];
+
+ /**
+ * Size of shared variables accessed by the compute shader.
+ */
+ unsigned SharedSize;
};
@@ -2769,6 +2777,13 @@ struct gl_shader_program
struct gl_uniform_storage **UniformRemapTable;
/**
+ * Sometimes there are empty slots left over in UniformRemapTable after we
+ * allocate slots to explicit locations. This list stores the blocks of
+ * continuous empty slots inside UniformRemapTable.
+ */
+ struct exec_list EmptyUniformLocations;
+
+ /**
* Size of the gl_ClipDistance array that is output from the last pipeline
* stage before the fragment shader.
*/
@@ -3044,6 +3059,7 @@ struct gl_shared_state
mtx_t Mutex; /**< for thread safety */
GLint RefCount; /**< Reference count */
struct _mesa_HashTable *DisplayList; /**< Display lists hash table */
+ struct _mesa_HashTable *BitmapAtlas; /**< For optimized glBitmap text */
struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */
/** Default texture objects (shared by all texture units) */
@@ -3727,6 +3743,7 @@ struct gl_constants
GLuint MaxComputeWorkGroupCount[3]; /* Array of x, y, z dimensions */
GLuint MaxComputeWorkGroupSize[3]; /* Array of x, y, z dimensions */
GLuint MaxComputeWorkGroupInvocations;
+ GLuint MaxComputeSharedMemorySize;
/** GL_ARB_gpu_shader5 */
GLfloat MinFragmentInterpolationOffset;
diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c
index 0e4a0af0b0f..af17be2a3f4 100644
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -964,8 +964,5 @@ _mesa_GetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize,
return;
}
- if (pipe->InfoLog)
- _mesa_copy_string(infoLog, bufSize, length, pipe->InfoLog);
- else
- *length = 0;
+ _mesa_copy_string(infoLog, bufSize, length, pipe->InfoLog);
}
diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index 50659b0cd21..0d9f8aecf08 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -357,10 +357,6 @@ _mesa_GetProgramResourceiv(GLuint program, GLenum programInterface,
return;
}
- /* No need to write any properties, user requested none. */
- if (bufSize == 0)
- return;
-
_mesa_get_program_resourceiv(shProg, programInterface, index,
propCount, props, bufSize, length, params);
}
diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 470182ab23d..882d863c1c7 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -582,7 +582,7 @@ read_rgba_pixels( struct gl_context *ctx,
void *luminance;
uint32_t luminance_format;
- luminance_stride = width * sizeof(GL_FLOAT);
+ luminance_stride = width * sizeof(GLfloat);
if (format == GL_LUMINANCE_ALPHA)
luminance_stride *= 2;
luminance_bytes = height * luminance_stride;
diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
index fe15508696e..ca366d967ab 100644
--- a/src/mesa/main/samplerobj.c
+++ b/src/mesa/main/samplerobj.c
@@ -1518,7 +1518,8 @@ _mesa_GetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint *params)
sampObj = _mesa_lookup_samplerobj(ctx, sampler);
if (!sampObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
+ _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+ GL_INVALID_OPERATION : GL_INVALID_VALUE),
"glGetSamplerParameterIiv(sampler %u)",
sampler);
return;
@@ -1593,7 +1594,8 @@ _mesa_GetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint *params)
sampObj = _mesa_lookup_samplerobj(ctx, sampler);
if (!sampObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
+ _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+ GL_INVALID_OPERATION : GL_INVALID_VALUE),
"glGetSamplerParameterIuiv(sampler %u)",
sampler);
return;
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index df92c0912af..cdf15b48a0d 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2124,6 +2124,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
int i;
for (i = 0; i < 3; i++)
dst_cp->LocalSize[i] = src->Comp.LocalSize[i];
+ dst_cp->SharedSize = src->Comp.SharedSize;
break;
}
default:
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index b9f7bb65fb6..49e5f028045 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -65,6 +65,7 @@ _mesa_alloc_shared_state(struct gl_context *ctx)
mtx_init(&shared->Mutex, mtx_plain);
shared->DisplayList = _mesa_NewHashTable();
+ shared->BitmapAtlas = _mesa_NewHashTable();
shared->TexObjects = _mesa_NewHashTable();
shared->Programs = _mesa_NewHashTable();
@@ -144,6 +145,18 @@ delete_displaylist_cb(GLuint id, void *data, void *userData)
/**
+ * Callback for deleting a bitmap atlas. Called by _mesa_HashDeleteAll().
+ */
+static void
+delete_bitmap_atlas_cb(GLuint id, void *data, void *userData)
+{
+ struct gl_bitmap_atlas *atlas = (struct gl_bitmap_atlas *) data;
+ struct gl_context *ctx = (struct gl_context *) userData;
+ _mesa_delete_bitmap_atlas(ctx, atlas);
+}
+
+
+/**
* Callback for deleting a texture object. Called by _mesa_HashDeleteAll().
*/
static void
@@ -309,6 +322,8 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared)
*/
_mesa_HashDeleteAll(shared->DisplayList, delete_displaylist_cb, ctx);
_mesa_DeleteHashTable(shared->DisplayList);
+ _mesa_HashDeleteAll(shared->BitmapAtlas, delete_bitmap_atlas_cb, ctx);
+ _mesa_DeleteHashTable(shared->BitmapAtlas);
_mesa_HashWalk(shared->ShaderObjects, free_shader_program_data_cb, ctx);
_mesa_HashDeleteAll(shared->ShaderObjects, delete_shader_cb, ctx);
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp
index e6412962251..24e3d189091 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -2436,6 +2436,16 @@ const struct function gles3_functions_possible[] = {
{ "glGetFragDataIndexEXT", 30, -1 },
{ "glBindFragDataLocationEXT", 30, -1 },
+ /* GL_OES_texture_border_clamp */
+ { "glTexParameterIivOES", 30, -1 },
+ { "glTexParameterIuivOES", 30, -1 },
+ { "glGetTexParameterIivOES", 30, -1 },
+ { "glGetTexParameterIuivOES", 30, -1 },
+ { "glSamplerParameterIivOES", 30, -1 },
+ { "glSamplerParameterIuivOES", 30, -1 },
+ { "glGetSamplerParameterIivOES", 30, -1 },
+ { "glGetSamplerParameterIuivOES", 30, -1 },
+
{ NULL, 0, -1 }
};
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index b273aaac2a1..06bc8f1ba15 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -869,12 +869,12 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa)
* the targets from table 8.19 (for GetTexImage and GetnTexImage *only*),
* or TEXTURE_CUBE_MAP (for GetTextureImage *only*)." (Emphasis added.)
*/
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
return dsa ? GL_FALSE : ctx->Extensions.ARB_texture_cube_map;
case GL_TEXTURE_CUBE_MAP:
return dsa ? GL_TRUE : GL_FALSE;
@@ -886,7 +886,7 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa)
/**
* Wrapper for _mesa_select_tex_image() which can handle target being
- * GL_TEXTURE_CUBE_MAP_ARB in which case we use zoffset to select a cube face.
+ * GL_TEXTURE_CUBE_MAP in which case we use zoffset to select a cube face.
* This can happen for glGetTextureImage and glGetTextureSubImage (DSA
* functions).
*/
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 50141be8693..8a4c6286cbe 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -116,20 +116,6 @@ adjust_for_oes_float_texture(GLenum format, GLenum type)
return format;
}
-/**
- * For cube map faces, return a face index in [0,5].
- * For other targets return 0;
- */
-GLuint
-_mesa_tex_target_to_face(GLenum target)
-{
- if (_mesa_is_cube_face(target))
- return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- else
- return 0;
-}
-
-
/**
* Install gl_texture_image in a gl_texture_object according to the target
@@ -273,15 +259,15 @@ proxy_target(GLenum target)
case GL_TEXTURE_3D:
case GL_PROXY_TEXTURE_3D:
return GL_PROXY_TEXTURE_3D;
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_ARB:
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
- return GL_PROXY_TEXTURE_CUBE_MAP_ARB;
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ case GL_TEXTURE_CUBE_MAP:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
+ return GL_PROXY_TEXTURE_CUBE_MAP;
case GL_TEXTURE_RECTANGLE_NV:
case GL_PROXY_TEXTURE_RECTANGLE_NV:
return GL_PROXY_TEXTURE_RECTANGLE_NV;
@@ -472,13 +458,13 @@ _mesa_max_texture_levels(struct gl_context *ctx, GLenum target)
case GL_PROXY_TEXTURE_3D:
return ctx->Const.Max3DTextureLevels;
case GL_TEXTURE_CUBE_MAP:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
return ctx->Extensions.ARB_texture_cube_map
? ctx->Const.MaxCubeTextureLevels : 0;
case GL_TEXTURE_RECTANGLE_NV:
@@ -1016,7 +1002,7 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, GLenum target,
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
maxSize = 1 << (ctx->Const.MaxCubeTextureLevels - 1);
maxSize >>= level;
if (width != height)
@@ -2299,8 +2285,10 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
}
if (baseFormat == GL_DEPTH_COMPONENT ||
baseFormat == GL_DEPTH_STENCIL ||
+ baseFormat == GL_STENCIL_INDEX ||
rb_base_format == GL_DEPTH_COMPONENT ||
rb_base_format == GL_DEPTH_STENCIL ||
+ rb_base_format == GL_STENCIL_INDEX ||
((baseFormat == GL_LUMINANCE_ALPHA ||
baseFormat == GL_ALPHA) &&
rb_base_format != GL_RGBA) ||
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index 5df36c59a28..17f2c908ecc 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -43,10 +43,63 @@ extern "C" {
static inline GLboolean
_mesa_is_cube_face(GLenum target)
{
- return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB &&
- target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB);
+ return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X &&
+ target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z);
}
+
+/**
+ * Return number of faces for a texture target. This will be 6 for
+ * cube maps and 1 otherwise.
+ * NOTE: this function is not used for cube map arrays which operate
+ * more like 2D arrays than cube maps.
+ */
+static inline GLuint
+_mesa_num_tex_faces(GLenum target)
+{
+ switch (target) {
+ case GL_TEXTURE_CUBE_MAP:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
+ return 6;
+ default:
+ return 1;
+ }
+}
+
+
+/**
+ * If the target is GL_TEXTURE_CUBE_MAP, return one of the
+ * GL_TEXTURE_CUBE_MAP_POSITIVE/NEGATIVE_X/Y/Z targets corresponding to
+ * the face parameter.
+ * Else, return target as-is.
+ */
+static inline GLenum
+_mesa_cube_face_target(GLenum target, unsigned face)
+{
+ if (target == GL_TEXTURE_CUBE_MAP) {
+ assert(face < 6);
+ return GL_TEXTURE_CUBE_MAP_POSITIVE_X + face;
+ }
+ else {
+ return target;
+ }
+}
+
+
+/**
+ * For cube map faces, return a face index in [0,5].
+ * For other targets return 0;
+ */
+static inline GLuint
+_mesa_tex_target_to_face(GLenum target)
+{
+ if (_mesa_is_cube_face(target))
+ return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+ else
+ return 0;
+}
+
+
/** Are any of the dimensions of given texture equal to zero? */
static inline GLboolean
_mesa_is_zero_size_texture(const struct gl_texture_image *texImage)
@@ -131,9 +184,6 @@ extern GLboolean
_mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
GLenum intFormat, GLenum *error);
-extern GLuint
-_mesa_tex_target_to_face(GLenum target);
-
extern GLint
_mesa_get_texture_dimensions(GLenum target);
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index e926c7b6cd2..d8407f04340 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -171,16 +171,16 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target)
return texUnit->CurrentTex[TEXTURE_3D_INDEX];
case GL_PROXY_TEXTURE_3D:
return ctx->Texture.ProxyTex[TEXTURE_3D_INDEX];
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ case GL_TEXTURE_CUBE_MAP:
return ctx->Extensions.ARB_texture_cube_map
? texUnit->CurrentTex[TEXTURE_CUBE_INDEX] : NULL;
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
return ctx->Extensions.ARB_texture_cube_map
? ctx->Texture.ProxyTex[TEXTURE_CUBE_INDEX] : NULL;
case GL_TEXTURE_CUBE_MAP_ARRAY:
@@ -239,7 +239,7 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target)
* \param shared the shared GL state structure to contain the texture object
* \param name integer name for the texture object
* \param target either GL_TEXTURE_1D, GL_TEXTURE_2D, GL_TEXTURE_3D,
- * GL_TEXTURE_CUBE_MAP_ARB or GL_TEXTURE_RECTANGLE_NV. zero is ok for the sake
+ * GL_TEXTURE_CUBE_MAP or GL_TEXTURE_RECTANGLE_NV. zero is ok for the sake
* of GenTextures()
*
* \return pointer to new texture object.
@@ -270,7 +270,7 @@ _mesa_initialize_texture_object( struct gl_context *ctx,
target == GL_TEXTURE_1D ||
target == GL_TEXTURE_2D ||
target == GL_TEXTURE_3D ||
- target == GL_TEXTURE_CUBE_MAP_ARB ||
+ target == GL_TEXTURE_CUBE_MAP ||
target == GL_TEXTURE_RECTANGLE_NV ||
target == GL_TEXTURE_1D_ARRAY_EXT ||
target == GL_TEXTURE_2D_ARRAY_EXT ||
@@ -513,7 +513,7 @@ valid_texture_object(const struct gl_texture_object *tex)
case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_3D:
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
case GL_TEXTURE_RECTANGLE_NV:
case GL_TEXTURE_1D_ARRAY_EXT:
case GL_TEXTURE_2D_ARRAY_EXT:
@@ -725,7 +725,7 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
case GL_TEXTURE_3D:
maxLevels = ctx->Const.Max3DTextureLevels;
break;
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
case GL_TEXTURE_CUBE_MAP_ARRAY:
maxLevels = ctx->Const.MaxCubeTextureLevels;
break;
@@ -768,7 +768,7 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
return;
}
- if (t->Target == GL_TEXTURE_CUBE_MAP_ARB) {
+ if (t->Target == GL_TEXTURE_CUBE_MAP) {
/* Make sure that all six cube map level 0 images are the same size and
* format.
* Note: we know that the image's width==height (we enforce that
@@ -1036,12 +1036,7 @@ _mesa_get_fallback_texture(struct gl_context *ctx, gl_texture_index tex)
/* need a loop here just for cube maps */
for (face = 0; face < numFaces; face++) {
- GLenum faceTarget;
-
- if (target == GL_TEXTURE_CUBE_MAP)
- faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face;
- else
- faceTarget = target;
+ const GLenum faceTarget = _mesa_cube_face_target(target, face);
/* initialize level[0] texture image */
texImage = _mesa_get_tex_image(ctx, texObj, faceTarget, 0);
diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h
index 8421337de4d..378d87a1cbb 100644
--- a/src/mesa/main/texobj.h
+++ b/src/mesa/main/texobj.h
@@ -120,25 +120,6 @@ _mesa_unlock_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
}
-/**
- * Return number of faces for a texture target. This will be 6 for
- * cube maps (and cube map arrays) and 1 otherwise.
- * NOTE: this function is not used for cube map arrays which operate
- * more like 2D arrays than cube maps.
- */
-static inline GLuint
-_mesa_num_tex_faces(GLenum target)
-{
- switch (target) {
- case GL_TEXTURE_CUBE_MAP:
- case GL_PROXY_TEXTURE_CUBE_MAP:
- return 6;
- default:
- return 1;
- }
-}
-
-
/** Is the texture "complete" with respect to the given sampler state? */
static inline GLboolean
_mesa_is_texture_complete(const struct gl_texture_object *texObj,
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 89f286cc05e..20770a77e15 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -72,7 +72,7 @@ validate_texture_wrap_mode(struct gl_context * ctx, GLenum target, GLenum wrap)
break;
case GL_CLAMP_TO_BORDER:
- supported = is_desktop_gl && e->ARB_texture_border_clamp
+ supported = ctx->API != API_OPENGLES && e->ARB_texture_border_clamp
&& (target != GL_TEXTURE_EXTERNAL_OES);
break;
@@ -500,9 +500,7 @@ set_tex_parameteri(struct gl_context *ctx,
goto invalid_pname;
case GL_DEPTH_STENCIL_TEXTURE_MODE:
- if ((_mesa_is_desktop_gl(ctx) &&
- ctx->Extensions.ARB_stencil_texturing) ||
- _mesa_is_gles31(ctx)) {
+ if (_mesa_has_ARB_stencil_texturing(ctx) || _mesa_is_gles31(ctx)) {
bool stencil = params[0] == GL_STENCIL_INDEX;
if (!stencil && params[0] != GL_DEPTH_COMPONENT)
goto invalid_param;
@@ -719,7 +717,8 @@ set_tex_parameterf(struct gl_context *ctx,
break;
case GL_TEXTURE_BORDER_COLOR:
- if (!_mesa_is_desktop_gl(ctx))
+ if (ctx->API == API_OPENGLES ||
+ !ctx->Extensions.ARB_texture_border_clamp)
goto invalid_pname;
if (!target_allows_setting_sampler_parameters(texObj->Target))
@@ -1215,12 +1214,12 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target,
return GL_TRUE;
case GL_TEXTURE_2D_ARRAY_EXT:
return ctx->Extensions.EXT_texture_array;
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
return ctx->Extensions.ARB_texture_cube_map;
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
@@ -1237,7 +1236,7 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target,
case GL_PROXY_TEXTURE_2D:
case GL_PROXY_TEXTURE_3D:
return GL_TRUE;
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
return ctx->Extensions.ARB_texture_cube_map;
case GL_TEXTURE_CUBE_MAP_ARRAY_ARB:
case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY_ARB:
@@ -1312,6 +1311,7 @@ get_tex_level_parameter_image(struct gl_context *ctx,
dummy_image.TexFormat = MESA_FORMAT_NONE;
dummy_image.InternalFormat = GL_RGBA;
dummy_image._BaseFormat = GL_NONE;
+ dummy_image.FixedSampleLocations = GL_TRUE;
img = &dummy_image;
}
@@ -1736,7 +1736,8 @@ get_tex_parameterfv(struct gl_context *ctx,
*params = ENUM_TO_FLOAT(obj->Sampler.WrapR);
break;
case GL_TEXTURE_BORDER_COLOR:
- if (!_mesa_is_desktop_gl(ctx))
+ if (ctx->API == API_OPENGLES ||
+ !ctx->Extensions.ARB_texture_border_clamp)
goto invalid_pname;
if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
@@ -1819,7 +1820,7 @@ get_tex_parameterfv(struct gl_context *ctx,
*params = (GLfloat) obj->DepthMode;
break;
case GL_DEPTH_STENCIL_TEXTURE_MODE:
- if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_stencil_texturing)
+ if (!_mesa_has_ARB_stencil_texturing(ctx) && !_mesa_is_gles31(ctx))
goto invalid_pname;
*params = (GLfloat)
(obj->StencilSampling ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT);
@@ -1970,7 +1971,8 @@ get_tex_parameteriv(struct gl_context *ctx,
*params = (GLint) obj->Sampler.WrapR;
break;
case GL_TEXTURE_BORDER_COLOR:
- if (!_mesa_is_desktop_gl(ctx))
+ if (ctx->API == API_OPENGLES ||
+ !ctx->Extensions.ARB_texture_border_clamp)
goto invalid_pname;
{
@@ -2054,7 +2056,7 @@ get_tex_parameteriv(struct gl_context *ctx,
*params = (GLint) obj->DepthMode;
break;
case GL_DEPTH_STENCIL_TEXTURE_MODE:
- if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_stencil_texturing)
+ if (!_mesa_has_ARB_stencil_texturing(ctx) && !_mesa_is_gles31(ctx))
goto invalid_pname;
*params = (GLint)
(obj->StencilSampling ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT);
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index 9d88554d945..9ee5c6974df 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -786,7 +786,7 @@ alloc_proxy_textures( struct gl_context *ctx )
GL_TEXTURE_2D_ARRAY_EXT,
GL_TEXTURE_1D_ARRAY_EXT,
GL_TEXTURE_EXTERNAL_OES,
- GL_TEXTURE_CUBE_MAP_ARB,
+ GL_TEXTURE_CUBE_MAP,
GL_TEXTURE_3D,
GL_TEXTURE_RECTANGLE_NV,
GL_TEXTURE_2D,
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index d7671738b18..c33b1095900 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -87,9 +87,6 @@ enum {
* Texture image storage function.
*/
typedef GLboolean (*StoreTexImageFunc)(TEXSTORE_PARAMS);
-static const GLubyte map_identity[6] = { 0, 1, 2, 3, ZERO, ONE };
-static const GLubyte map_3210[6] = { 3, 2, 1, 0, ZERO, ONE };
-static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE };
/**
diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c
index 04b7d73da5c..316d8280338 100644
--- a/src/mesa/main/textureview.c
+++ b/src/mesa/main/textureview.c
@@ -211,10 +211,7 @@ initialize_texture_fields(struct gl_context *ctx,
for (level = 0; level < levels; level++) {
for (face = 0; face < numFaces; face++) {
struct gl_texture_image *texImage;
- GLenum faceTarget = target;
-
- if (target == GL_TEXTURE_CUBE_MAP)
- faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face;
+ const GLenum faceTarget = _mesa_cube_face_target(target, face);
texImage = _mesa_get_tex_image(ctx, texObj, faceTarget, level);
@@ -536,9 +533,7 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture,
newViewNumLevels = MIN2(numlevels, origTexObj->NumLevels - minlevel);
newViewNumLayers = MIN2(numlayers, origTexObj->NumLayers - minlayer);
- faceTarget = origTexObj->Target;
- if (faceTarget == GL_TEXTURE_CUBE_MAP)
- faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + minlayer;
+ faceTarget = _mesa_cube_face_target(origTexObj->Target, minlayer);
/* Get a reference to what will become this View's base level */
origTexImage = _mesa_select_tex_image(origTexObj, faceTarget, minlevel);
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index eed241271df..db53377d705 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -40,7 +40,7 @@
#include "prog_statevars.h"
#include "prog_parameter.h"
#include "main/samplerobj.h"
-#include "framebuffer.h"
+#include "main/framebuffer.h"
#define ONE_DIV_SQRT_LN2 (1.201122408786449815)
@@ -353,7 +353,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[],
}
return;
case STATE_NUM_SAMPLES:
- ((int *)value)[0] = _mesa_geometric_samples(ctx->DrawBuffer);
+ ((int *)value)[0] = MAX2(1, _mesa_geometric_samples(ctx->DrawBuffer));
return;
case STATE_DEPTH_RANGE:
value[0] = ctx->ViewportArray[0].Near; /* near */
diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 4b89ade1b15..622621bdcbb 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -38,9 +38,9 @@
/**
- * This is used to initialize st->atoms[].
+ * This is used to initialize st->render_atoms[].
*/
-static const struct st_tracked_state *atoms[] =
+static const struct st_tracked_state *render_atoms[] =
{
&st_update_depth_stencil_alpha,
&st_update_clip,
@@ -85,6 +85,11 @@ static const struct st_tracked_state *atoms[] =
&st_bind_tes_ssbos,
&st_bind_fs_ssbos,
&st_bind_gs_ssbos,
+ &st_bind_vs_images,
+ &st_bind_tcs_images,
+ &st_bind_tes_images,
+ &st_bind_gs_images,
+ &st_bind_fs_images,
&st_update_pixel_transfer,
&st_update_tess,
@@ -93,6 +98,22 @@ static const struct st_tracked_state *atoms[] =
};
+/**
+ * This is used to initialize st->compute_atoms[].
+ */
+static const struct st_tracked_state *compute_atoms[] =
+{
+ &st_update_cp,
+ &st_update_compute_texture,
+ &st_update_sampler, /* depends on update_compute_texture for swizzle */
+ &st_update_cs_constants,
+ &st_bind_cs_ubos,
+ &st_bind_cs_atomics,
+ &st_bind_cs_ssbos,
+ &st_bind_cs_images,
+};
+
+
void st_init_atoms( struct st_context *st )
{
/* no-op */
@@ -178,20 +199,41 @@ static void check_attrib_edgeflag(struct st_context *st)
* Update all derived state:
*/
-void st_validate_state( struct st_context *st )
+void st_validate_state( struct st_context *st, enum st_pipeline pipeline )
{
- struct st_state_flags *state = &st->dirty;
+ const struct st_tracked_state **atoms;
+ struct st_state_flags *state;
+ GLuint num_atoms;
GLuint i;
+ /* Get pipeline state. */
+ switch (pipeline) {
+ case ST_PIPELINE_RENDER:
+ atoms = render_atoms;
+ num_atoms = ARRAY_SIZE(render_atoms);
+ state = &st->dirty;
+ break;
+ case ST_PIPELINE_COMPUTE:
+ atoms = compute_atoms;
+ num_atoms = ARRAY_SIZE(compute_atoms);
+ state = &st->dirty_cp;
+ break;
+ default:
+ unreachable("Invalid pipeline specified");
+ }
+
/* Get Mesa driver state. */
st->dirty.st |= st->ctx->NewDriverState;
+ st->dirty_cp.st |= st->ctx->NewDriverState;
st->ctx->NewDriverState = 0;
- check_attrib_edgeflag(st);
+ if (pipeline == ST_PIPELINE_RENDER) {
+ check_attrib_edgeflag(st);
- check_program_state( st );
+ check_program_state(st);
- st_manager_validate_framebuffers(st);
+ st_manager_validate_framebuffers(st);
+ }
if (state->st == 0 && state->mesa == 0)
return;
@@ -211,7 +253,7 @@ void st_validate_state( struct st_context *st )
memset(&examined, 0, sizeof(examined));
prev = *state;
- for (i = 0; i < ARRAY_SIZE(atoms); i++) {
+ for (i = 0; i < num_atoms; i++) {
const struct st_tracked_state *atom = atoms[i];
struct st_state_flags generated;
@@ -242,7 +284,7 @@ void st_validate_state( struct st_context *st )
}
else {
- for (i = 0; i < ARRAY_SIZE(atoms); i++) {
+ for (i = 0; i < num_atoms; i++) {
if (check_state(state, &atoms[i]->dirty))
atoms[i]->update( st );
}
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index 3a9153c80cb..31bb2dd98bf 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -36,6 +36,9 @@
#include "main/glheader.h"
+#include "state_tracker/st_api.h"
+#include "state_tracker/st_context.h"
+
struct st_context;
struct st_tracked_state;
@@ -43,7 +46,7 @@ void st_init_atoms( struct st_context *st );
void st_destroy_atoms( struct st_context *st );
-void st_validate_state( struct st_context *st );
+void st_validate_state( struct st_context *st, enum st_pipeline pipeline );
extern const struct st_tracked_state st_update_array;
@@ -55,6 +58,7 @@ extern const struct st_tracked_state st_update_gp;
extern const struct st_tracked_state st_update_tep;
extern const struct st_tracked_state st_update_tcp;
extern const struct st_tracked_state st_update_vp;
+extern const struct st_tracked_state st_update_cp;
extern const struct st_tracked_state st_update_rasterizer;
extern const struct st_tracked_state st_update_polygon_stipple;
extern const struct st_tracked_state st_update_viewport;
@@ -68,26 +72,37 @@ extern const struct st_tracked_state st_update_vertex_texture;
extern const struct st_tracked_state st_update_geometry_texture;
extern const struct st_tracked_state st_update_tessctrl_texture;
extern const struct st_tracked_state st_update_tesseval_texture;
+extern const struct st_tracked_state st_update_compute_texture;
extern const struct st_tracked_state st_update_fs_constants;
extern const struct st_tracked_state st_update_gs_constants;
extern const struct st_tracked_state st_update_tes_constants;
extern const struct st_tracked_state st_update_tcs_constants;
extern const struct st_tracked_state st_update_vs_constants;
+extern const struct st_tracked_state st_update_cs_constants;
extern const struct st_tracked_state st_bind_fs_ubos;
extern const struct st_tracked_state st_bind_vs_ubos;
extern const struct st_tracked_state st_bind_gs_ubos;
extern const struct st_tracked_state st_bind_tcs_ubos;
extern const struct st_tracked_state st_bind_tes_ubos;
+extern const struct st_tracked_state st_bind_cs_ubos;
extern const struct st_tracked_state st_bind_fs_atomics;
extern const struct st_tracked_state st_bind_vs_atomics;
extern const struct st_tracked_state st_bind_gs_atomics;
extern const struct st_tracked_state st_bind_tcs_atomics;
extern const struct st_tracked_state st_bind_tes_atomics;
+extern const struct st_tracked_state st_bind_cs_atomics;
extern const struct st_tracked_state st_bind_fs_ssbos;
extern const struct st_tracked_state st_bind_vs_ssbos;
extern const struct st_tracked_state st_bind_gs_ssbos;
extern const struct st_tracked_state st_bind_tcs_ssbos;
extern const struct st_tracked_state st_bind_tes_ssbos;
+extern const struct st_tracked_state st_bind_cs_ssbos;
+extern const struct st_tracked_state st_bind_fs_images;
+extern const struct st_tracked_state st_bind_vs_images;
+extern const struct st_tracked_state st_bind_gs_images;
+extern const struct st_tracked_state st_bind_tcs_images;
+extern const struct st_tracked_state st_bind_tes_images;
+extern const struct st_tracked_state st_bind_cs_images;
extern const struct st_tracked_state st_update_pixel_transfer;
extern const struct st_tracked_state st_update_tess;
diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c
index 1c30d1fb701..a27dbe0460d 100644
--- a/src/mesa/state_tracker/st_atom_atomicbuf.c
+++ b/src/mesa/state_tracker/st_atom_atomicbuf.c
@@ -58,9 +58,11 @@ st_bind_atomics(struct st_context *st,
st_buffer_object(binding->BufferObject);
struct pipe_shader_buffer sb = { 0 };
- sb.buffer = st_obj->buffer;
- sb.buffer_offset = binding->Offset;
- sb.buffer_size = st_obj->buffer->width0 - binding->Offset;
+ if (st_obj && st_obj->buffer) {
+ sb.buffer = st_obj->buffer;
+ sb.buffer_offset = binding->Offset;
+ sb.buffer_size = st_obj->buffer->width0 - binding->Offset;
+ }
st->pipe->set_shader_buffers(st->pipe, shader_type,
atomic->Binding, 1, &sb);
@@ -156,3 +158,21 @@ const struct st_tracked_state st_bind_tes_atomics = {
},
bind_tes_atomics
};
+
+static void
+bind_cs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_COMPUTE);
+}
+
+const struct st_tracked_state st_bind_cs_atomics = {
+ "st_bind_cs_atomics",
+ {
+ 0,
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_cs_atomics
+};
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index 66811d29c29..407dfd31c80 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -61,7 +61,8 @@ void st_upload_constants( struct st_context *st,
shader_type == PIPE_SHADER_FRAGMENT ||
shader_type == PIPE_SHADER_GEOMETRY ||
shader_type == PIPE_SHADER_TESS_CTRL ||
- shader_type == PIPE_SHADER_TESS_EVAL);
+ shader_type == PIPE_SHADER_TESS_EVAL ||
+ shader_type == PIPE_SHADER_COMPUTE);
/* update constants */
if (params && params->NumParameters) {
@@ -226,6 +227,28 @@ const struct st_tracked_state st_update_tes_constants = {
update_tes_constants /* update */
};
+/* Compute shader:
+ */
+static void update_cs_constants(struct st_context *st )
+{
+ struct st_compute_program *cp = st->cp;
+ struct gl_program_parameter_list *params;
+
+ if (cp) {
+ params = cp->Base.Base.Parameters;
+ st_upload_constants( st, params, PIPE_SHADER_COMPUTE );
+ }
+}
+
+const struct st_tracked_state st_update_cs_constants = {
+ "st_update_cs_constants", /* name */
+ { /* dirty */
+ _NEW_PROGRAM_CONSTANTS, /* mesa */
+ ST_NEW_COMPUTE_PROGRAM, /* st */
+ },
+ update_cs_constants /* update */
+};
+
static void st_bind_ubos(struct st_context *st,
struct gl_shader *shader,
unsigned shader_type)
@@ -363,3 +386,24 @@ const struct st_tracked_state st_bind_tes_ubos = {
},
bind_tes_ubos
};
+
+static void bind_cs_ubos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ if (!prog)
+ return;
+
+ st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+ PIPE_SHADER_COMPUTE);
+}
+
+const struct st_tracked_state st_bind_cs_ubos = {
+ "st_bind_cs_ubos",
+ {
+ 0,
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_UNIFORM_BUFFER,
+ },
+ bind_cs_ubos
+};
diff --git a/src/mesa/state_tracker/st_atom_image.c b/src/mesa/state_tracker/st_atom_image.c
new file mode 100644
index 00000000000..4b48bc30b69
--- /dev/null
+++ b/src/mesa/state_tracker/st_atom_image.c
@@ -0,0 +1,236 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Ilia Mirkin. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/imports.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "compiler/glsl/ir_uniform.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+
+#include "st_cb_texture.h"
+#include "st_debug.h"
+#include "st_texture.h"
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_program.h"
+#include "st_format.h"
+
+static void
+st_bind_images(struct st_context *st, struct gl_shader *shader,
+ unsigned shader_type)
+{
+ unsigned i;
+ struct pipe_image_view images[MAX_IMAGE_UNIFORMS];
+ struct gl_program_constants *c = &st->ctx->Const.Program[shader->Stage];
+
+ if (!shader || !st->pipe->set_shader_images)
+ return;
+
+ for (i = 0; i < shader->NumImages; i++) {
+ struct gl_image_unit *u = &st->ctx->ImageUnits[shader->ImageUnits[i]];
+ struct st_texture_object *stObj = st_texture_object(u->TexObj);
+ struct pipe_image_view *img = &images[i];
+
+ if (!stObj ||
+ !st_finalize_texture(st->ctx, st->pipe, u->TexObj) ||
+ !stObj->pt) {
+ memset(img, 0, sizeof(*img));
+ continue;
+ }
+
+ img->resource = stObj->pt;
+ img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat);
+ if (stObj->pt->target == PIPE_BUFFER) {
+ unsigned base, size;
+ unsigned f, n;
+ const struct util_format_description *desc
+ = util_format_description(img->format);
+
+ base = stObj->base.BufferOffset;
+ assert(base < stObj->pt->width0);
+ size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize);
+
+ f = (base / (desc->block.bits / 8)) * desc->block.width;
+ n = (size / (desc->block.bits / 8)) * desc->block.width;
+ assert(n > 0);
+ img->u.buf.first_element = f;
+ img->u.buf.last_element = f + (n - 1);
+ } else {
+ img->u.tex.level = u->Level + stObj->base.MinLevel;
+ if (stObj->pt->target == PIPE_TEXTURE_3D) {
+ if (u->Layered) {
+ img->u.tex.first_layer = 0;
+ img->u.tex.last_layer = u_minify(stObj->pt->depth0, img->u.tex.level) - 1;
+ } else {
+ img->u.tex.first_layer = u->_Layer;
+ img->u.tex.last_layer = u->_Layer;
+ }
+ } else {
+ img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer;
+ img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer;
+ if (u->Layered && img->resource->array_size > 1) {
+ if (stObj->base.Immutable)
+ img->u.tex.last_layer += stObj->base.NumLayers - 1;
+ else
+ img->u.tex.last_layer += img->resource->array_size - 1;
+ }
+ }
+ }
+ }
+ st->pipe->set_shader_images(st->pipe, shader_type, 0, shader->NumImages,
+ images);
+ /* clear out any stale shader images */
+ if (shader->NumImages < c->MaxImageUniforms)
+ st->pipe->set_shader_images(
+ st->pipe, shader_type,
+ shader->NumImages,
+ c->MaxImageUniforms - shader->NumImages,
+ NULL);
+}
+
+static void bind_vs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_VERTEX], PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_bind_vs_images = {
+ "st_bind_vs_images",
+ {
+ 0,
+ ST_NEW_VERTEX_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_vs_images
+};
+
+static void bind_fs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], PIPE_SHADER_FRAGMENT);
+}
+
+const struct st_tracked_state st_bind_fs_images = {
+ "st_bind_fs_images",
+ {
+ 0,
+ ST_NEW_FRAGMENT_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_fs_images
+};
+
+static void bind_gs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], PIPE_SHADER_GEOMETRY);
+}
+
+const struct st_tracked_state st_bind_gs_images = {
+ "st_bind_gs_images",
+ {
+ 0,
+ ST_NEW_GEOMETRY_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_gs_images
+};
+
+static void bind_tcs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL], PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_images = {
+ "st_bind_tcs_images",
+ {
+ 0,
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_tcs_images
+};
+
+static void bind_tes_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL], PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_images = {
+ "st_bind_tes_images",
+ {
+ 0,
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_tes_images
+};
+
+static void bind_cs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE], PIPE_SHADER_COMPUTE);
+}
+
+const struct st_tracked_state st_bind_cs_images = {
+ "st_bind_cs_images",
+ {
+ 0,
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_cs_images
+};
diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c
index 94231cf1946..82dcf5ee0ca 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -321,6 +321,14 @@ update_samplers(struct st_context *st)
st->state.samplers[PIPE_SHADER_TESS_EVAL],
&st->state.num_samplers[PIPE_SHADER_TESS_EVAL]);
}
+ if (ctx->ComputeProgram._Current) {
+ update_shader_samplers(st,
+ PIPE_SHADER_COMPUTE,
+ &ctx->ComputeProgram._Current->Base,
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits,
+ st->state.samplers[PIPE_SHADER_COMPUTE],
+ &st->state.num_samplers[PIPE_SHADER_COMPUTE]);
+ }
}
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index c8650a5899a..a88f0352746 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -255,3 +255,35 @@ const struct st_tracked_state st_update_tep = {
},
update_tep /* update */
};
+
+
+
+static void
+update_cp( struct st_context *st )
+{
+ struct st_compute_program *stcp;
+
+ if (!st->ctx->ComputeProgram._Current) {
+ cso_set_compute_shader_handle(st->cso_context, NULL);
+ return;
+ }
+
+ stcp = st_compute_program(st->ctx->ComputeProgram._Current);
+ assert(stcp->Base.Base.Target == GL_COMPUTE_PROGRAM_NV);
+
+ st->cp_variant = st_get_cp_variant(st, &stcp->tgsi, &stcp->variants);
+
+ st_reference_compprog(st, &st->cp, stcp);
+
+ cso_set_compute_shader_handle(st->cso_context,
+ st->cp_variant->driver_shader);
+}
+
+const struct st_tracked_state st_update_cp = {
+ "st_update_cp", /* name */
+ { /* dirty */
+ 0, /* mesa */
+ ST_NEW_COMPUTE_PROGRAM /* st */
+ },
+ update_cp /* update */
+};
diff --git a/src/mesa/state_tracker/st_atom_storagebuf.c b/src/mesa/state_tracker/st_atom_storagebuf.c
index f165cc3e0a1..37b4c4d256d 100644
--- a/src/mesa/state_tracker/st_atom_storagebuf.c
+++ b/src/mesa/state_tracker/st_atom_storagebuf.c
@@ -194,3 +194,24 @@ const struct st_tracked_state st_bind_tes_ssbos = {
},
bind_tes_ssbos
};
+
+static void bind_cs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+ PIPE_SHADER_COMPUTE);
+}
+
+const struct st_tracked_state st_bind_cs_ssbos = {
+ "st_bind_cs_ssbos",
+ {
+ 0,
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_cs_ssbos
+};
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 3e3775200a8..4b7ad77b47a 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -32,6 +32,7 @@
*/
+#include "main/context.h"
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/samplerobj.h"
@@ -147,9 +148,7 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode,
else
return SWIZZLE_XYZW;
case GL_STENCIL_INDEX:
- return SWIZZLE_XYZW;
case GL_DEPTH_STENCIL:
- /* fall-through */
case GL_DEPTH_COMPONENT:
/* Now examine the depth mode */
switch (depthMode) {
@@ -193,15 +192,29 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode,
static unsigned
-get_texture_format_swizzle(const struct st_texture_object *stObj,
+get_texture_format_swizzle(const struct st_context *st,
+ const struct st_texture_object *stObj,
unsigned glsl_version)
{
GLenum baseFormat = _mesa_texture_base_format(&stObj->base);
unsigned tex_swizzle;
if (baseFormat != GL_NONE) {
+ GLenum depth_mode = stObj->base.DepthMode;
+ /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
+ * with depth component data specified with a sized internal format.
+ */
+ if (_mesa_is_gles3(st->ctx) &&
+ util_format_is_depth_or_stencil(stObj->pt->format)) {
+ const struct st_texture_image *firstImage =
+ st_texture_image_const(_mesa_base_tex_image(&stObj->base));
+ if (firstImage->base.InternalFormat != GL_DEPTH_COMPONENT &&
+ firstImage->base.InternalFormat != GL_DEPTH_STENCIL &&
+ firstImage->base.InternalFormat != GL_STENCIL_INDEX)
+ depth_mode = GL_RED;
+ }
tex_swizzle = compute_texture_format_swizzle(baseFormat,
- stObj->base.DepthMode,
+ depth_mode,
stObj->pt->format,
glsl_version);
}
@@ -221,10 +234,11 @@ get_texture_format_swizzle(const struct st_texture_object *stObj,
* \param stObj the st texture object,
*/
static boolean
-check_sampler_swizzle(const struct st_texture_object *stObj,
+check_sampler_swizzle(const struct st_context *st,
+ const struct st_texture_object *stObj,
struct pipe_sampler_view *sv, unsigned glsl_version)
{
- unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version);
+ unsigned swizzle = get_texture_format_swizzle(st, stObj, glsl_version);
return ((sv->swizzle_r != GET_SWZ(swizzle, 0)) ||
(sv->swizzle_g != GET_SWZ(swizzle, 1)) ||
@@ -251,13 +265,13 @@ static unsigned last_layer(struct st_texture_object *stObj)
}
static struct pipe_sampler_view *
-st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
+st_create_texture_sampler_view_from_stobj(struct st_context *st,
struct st_texture_object *stObj,
enum pipe_format format,
unsigned glsl_version)
{
struct pipe_sampler_view templ;
- unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version);
+ unsigned swizzle = get_texture_format_swizzle(st, stObj, glsl_version);
u_sampler_view_default_template(&templ,
stObj->pt,
@@ -297,7 +311,7 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
templ.swizzle_a = GET_SWZ(swizzle, 3);
}
- return pipe->create_sampler_view(pipe, stObj->pt, &templ);
+ return st->pipe->create_sampler_view(st->pipe, stObj->pt, &templ);
}
@@ -327,7 +341,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st,
/* if sampler view has changed dereference it */
if (*sv) {
- if (check_sampler_swizzle(stObj, *sv, glsl_version) ||
+ if (check_sampler_swizzle(st, stObj, *sv, glsl_version) ||
(format != (*sv)->format) ||
gl_target_to_pipe(stObj->base.Target) != (*sv)->target ||
stObj->base.MinLevel + stObj->base.BaseLevel != (*sv)->u.tex.first_level ||
@@ -339,7 +353,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st,
}
if (!*sv) {
- *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj,
+ *sv = st_create_texture_sampler_view_from_stobj(st, stObj,
format, glsl_version);
} else if ((*sv)->context != st->pipe) {
@@ -534,6 +548,22 @@ update_tesseval_textures(struct st_context *st)
}
+static void
+update_compute_textures(struct st_context *st)
+{
+ const struct gl_context *ctx = st->ctx;
+
+ if (ctx->ComputeProgram._Current) {
+ update_textures(st,
+ MESA_SHADER_COMPUTE,
+ &ctx->ComputeProgram._Current->Base,
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits,
+ st->state.sampler_views[PIPE_SHADER_COMPUTE],
+ &st->state.num_sampler_views[PIPE_SHADER_COMPUTE]);
+ }
+}
+
+
const struct st_tracked_state st_update_fragment_texture = {
"st_update_texture", /* name */
{ /* dirty */
@@ -582,3 +612,13 @@ const struct st_tracked_state st_update_tesseval_texture = {
},
update_tesseval_textures /* update */
};
+
+
+const struct st_tracked_state st_update_compute_texture = {
+ "st_update_compute_texture", /* name */
+ { /* dirty */
+ _NEW_TEXTURE, /* mesa */
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */
+ },
+ update_compute_textures /* update */
+};
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 627b8cbd598..4fd2dfef8cc 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -33,6 +33,7 @@
#include "main/imports.h"
#include "main/image.h"
#include "main/bufferobj.h"
+#include "main/dlist.h"
#include "main/macros.h"
#include "main/pbo.h"
#include "program/program.h"
@@ -41,6 +42,7 @@
#include "st_context.h"
#include "st_atom.h"
#include "st_atom_constbuf.h"
+#include "st_draw.h"
#include "st_program.h"
#include "st_cb_bitmap.h"
#include "st_texture.h"
@@ -49,7 +51,6 @@
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_inlines.h"
-#include "util/u_draw_quad.h"
#include "util/u_simple_shaders.h"
#include "util/u_upload_mgr.h"
#include "program/prog_instruction.h"
@@ -183,7 +184,8 @@ make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
static void
setup_render_state(struct gl_context *ctx,
struct pipe_sampler_view *sv,
- const GLfloat *color)
+ const GLfloat *color,
+ bool atlas)
{
struct st_context *st = st_context(ctx);
struct cso_context *cso = st->cso_context;
@@ -194,7 +196,7 @@ setup_render_state(struct gl_context *ctx,
key.st = st->has_shareable_shaders ? NULL : st;
key.bitmap = GL_TRUE;
key.clamp_color = st->clamp_frag_color_in_shader &&
- st->ctx->Color._ClampFragmentColor;
+ ctx->Color._ClampFragmentColor;
fpv = st_get_fp_variant(st, st->fp, &key);
@@ -214,18 +216,15 @@ setup_render_state(struct gl_context *ctx,
COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
}
- cso_save_rasterizer(cso);
- cso_save_fragment_samplers(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_viewport(cso);
- cso_save_fragment_shader(cso);
- cso_save_stream_outputs(cso);
- cso_save_vertex_shader(cso);
- cso_save_tessctrl_shader(cso);
- cso_save_tesseval_shader(cso);
- cso_save_geometry_shader(cso);
- cso_save_vertex_elements(cso);
- cso_save_aux_vertex_buffer_slot(cso);
+ cso_save_state(cso, (CSO_BIT_RASTERIZER |
+ CSO_BIT_FRAGMENT_SAMPLERS |
+ CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BITS_ALL_SHADERS));
+
/* rasterizer state: just scissor */
st->bitmap.rasterizer.scissor = ctx->Scissor.EnableFlags & 1;
@@ -251,7 +250,10 @@ setup_render_state(struct gl_context *ctx,
for (i = 0; i < st->state.num_samplers[PIPE_SHADER_FRAGMENT]; i++) {
samplers[i] = &st->state.samplers[PIPE_SHADER_FRAGMENT][i];
}
- samplers[fpv->bitmap_sampler] = &st->bitmap.sampler;
+ if (atlas)
+ samplers[fpv->bitmap_sampler] = &st->bitmap.atlas_sampler;
+ else
+ samplers[fpv->bitmap_sampler] = &st->bitmap.sampler;
cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num,
(const struct pipe_sampler_state **) samplers);
}
@@ -268,21 +270,12 @@ setup_render_state(struct gl_context *ctx,
}
/* viewport state: viewport matching window dims */
- {
- const GLboolean invert = st->state.fb_orientation == Y_0_TOP;
- const GLfloat width = (GLfloat)st->state.framebuffer.width;
- const GLfloat height = (GLfloat)st->state.framebuffer.height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * width;
- vp.scale[1] = height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 0.5f;
- vp.translate[0] = 0.5f * width;
- vp.translate[1] = 0.5f * height;
- vp.translate[2] = 0.5f;
- cso_set_viewport(cso, &vp);
- }
+ cso_set_viewport_dims(cso, st->state.framebuffer.width,
+ st->state.framebuffer.height,
+ st->state.fb_orientation == Y_0_TOP);
+
+ cso_set_vertex_elements(cso, 3, st->util_velems);
- cso_set_vertex_elements(cso, 3, st->velems_util_draw);
cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
}
@@ -296,18 +289,7 @@ restore_render_state(struct gl_context *ctx)
struct st_context *st = st_context(ctx);
struct cso_context *cso = st->cso_context;
- cso_restore_rasterizer(cso);
- cso_restore_fragment_samplers(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_viewport(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_tessctrl_shader(cso);
- cso_restore_tesseval_shader(cso);
- cso_restore_geometry_shader(cso);
- cso_restore_vertex_elements(cso);
- cso_restore_aux_vertex_buffer_slot(cso);
- cso_restore_stream_outputs(cso);
+ cso_restore_state(cso);
}
@@ -322,7 +304,6 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
- struct pipe_resource *vbuf = NULL;
const float fb_width = (float) st->state.framebuffer.width;
const float fb_height = (float) st->state.framebuffer.height;
const float x0 = (float) x;
@@ -335,8 +316,6 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
const float clip_y0 = y0 / fb_height * 2.0f - 1.0f;
const float clip_x1 = x1 / fb_width * 2.0f - 1.0f;
const float clip_y1 = y1 / fb_height * 2.0f - 1.0f;
- float (*vertices)[3][4]; /**< vertex pos + color + texcoord */
- unsigned offset, i;
/* limit checks */
{
@@ -349,7 +328,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
assert(height <= (GLsizei) maxSize);
}
- setup_render_state(ctx, sv, color);
+ setup_render_state(ctx, sv, color, false);
/* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
z = z * 2.0f - 1.0f;
@@ -360,62 +339,13 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
tBot = (float) height;
}
- u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4,
- &offset, &vbuf, (void **) &vertices);
- if (!vbuf) {
+ if (!st_draw_quad(st, clip_x0, clip_y0, clip_x1, clip_y1, z,
+ sLeft, tBot, sRight, tTop, color, 0)) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glBitmap");
- restore_render_state(ctx);
- return;
}
- /* Positions are in clip coords since we need to do clipping in case
- * the bitmap quad goes beyond the window bounds.
- */
- vertices[0][0][0] = clip_x0;
- vertices[0][0][1] = clip_y0;
- vertices[0][2][0] = sLeft;
- vertices[0][2][1] = tTop;
-
- vertices[1][0][0] = clip_x1;
- vertices[1][0][1] = clip_y0;
- vertices[1][2][0] = sRight;
- vertices[1][2][1] = tTop;
-
- vertices[2][0][0] = clip_x1;
- vertices[2][0][1] = clip_y1;
- vertices[2][2][0] = sRight;
- vertices[2][2][1] = tBot;
-
- vertices[3][0][0] = clip_x0;
- vertices[3][0][1] = clip_y1;
- vertices[3][2][0] = sLeft;
- vertices[3][2][1] = tBot;
-
- /* same for all verts: */
- for (i = 0; i < 4; i++) {
- vertices[i][0][2] = z;
- vertices[i][0][3] = 1.0f;
- vertices[i][1][0] = color[0];
- vertices[i][1][1] = color[1];
- vertices[i][1][2] = color[2];
- vertices[i][1][3] = color[3];
- vertices[i][2][2] = 0.0; /*R*/
- vertices[i][2][3] = 1.0; /*Q*/
- }
-
- u_upload_unmap(st->uploader);
-
- util_draw_vertex_buffer(pipe, st->cso_context, vbuf,
- cso_get_aux_vertex_buffer_slot(st->cso_context),
- offset,
- PIPE_PRIM_TRIANGLE_FAN,
- 4, /* verts */
- 3); /* attribs/vert */
-
restore_render_state(ctx);
- pipe_resource_reference(&vbuf, NULL);
-
/* We uploaded modified constants, need to invalidate them. */
st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
}
@@ -555,7 +485,7 @@ accum_bitmap(struct gl_context *ctx,
struct st_context *st = ctx->st;
struct bitmap_cache *cache = st->bitmap.cache;
int px = -999, py = -999;
- const GLfloat z = st->ctx->Current.RasterPos[2];
+ const GLfloat z = ctx->Current.RasterPos[2];
if (width > BITMAP_CACHE_WIDTH ||
height > BITMAP_CACHE_HEIGHT)
@@ -566,7 +496,7 @@ accum_bitmap(struct gl_context *ctx,
py = y - cache->ypos;
if (px < 0 || px + width > BITMAP_CACHE_WIDTH ||
py < 0 || py + height > BITMAP_CACHE_HEIGHT ||
- !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) ||
+ !TEST_EQ_4V(ctx->Current.RasterColor, cache->color) ||
((fabs(z - cache->zpos) > Z_EPSILON))) {
/* This bitmap would extend beyond cache bounds, or the bitmap
* color is changing
@@ -584,7 +514,7 @@ accum_bitmap(struct gl_context *ctx,
cache->ypos = y - py;
cache->zpos = z;
cache->empty = GL_FALSE;
- COPY_4FV(cache->color, st->ctx->Current.RasterColor);
+ COPY_4FV(cache->color, ctx->Current.RasterColor);
}
assert(px != -999);
@@ -645,6 +575,9 @@ init_bitmap_state(struct st_context *st)
st->bitmap.sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
st->bitmap.sampler.normalized_coords = st->internal_target == PIPE_TEXTURE_2D;
+ st->bitmap.atlas_sampler = st->bitmap.sampler;
+ st->bitmap.atlas_sampler.normalized_coords = 0;
+
/* init baseline rasterizer state once */
memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
st->bitmap.rasterizer.half_pixel_center = 1;
@@ -713,7 +646,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
* explicitly uploaded in the draw_bitmap_quad() function.
*/
if ((st->dirty.mesa & ~_NEW_PROGRAM_CONSTANTS) || st->dirty.st) {
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
}
if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap))
@@ -728,8 +661,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
if (sv) {
draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
- width, height, sv,
- st->ctx->Current.RasterColor);
+ width, height, sv, ctx->Current.RasterColor);
pipe_sampler_view_reference(&sv, NULL);
}
@@ -740,11 +672,139 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
}
+/**
+ * Called via ctx->Driver.DrawAtlasBitmap()
+ */
+static void
+st_DrawAtlasBitmaps(struct gl_context *ctx,
+ const struct gl_bitmap_atlas *atlas,
+ GLuint count, const GLubyte *ids)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct st_texture_object *stObj = st_texture_object(atlas->texObj);
+ struct pipe_sampler_view *sv;
+ /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
+ const float z = ctx->Current.RasterPos[2] * 2.0f - 1.0f;
+ const float *color = ctx->Current.RasterColor;
+ const float clip_x_scale = 2.0f / st->state.framebuffer.width;
+ const float clip_y_scale = 2.0f / st->state.framebuffer.height;
+ const unsigned num_verts = count * 4;
+ const unsigned num_vert_bytes = num_verts * sizeof(struct st_util_vertex);
+ struct st_util_vertex *verts;
+ struct pipe_vertex_buffer vb = {0};
+ unsigned i;
+
+ if (!st->bitmap.cache) {
+ init_bitmap_state(st);
+ }
+
+ st_flush_bitmap_cache(st);
+
+ st_validate_state(st, ST_PIPELINE_RENDER);
+
+ sv = st_create_texture_sampler_view(pipe, stObj->pt);
+
+ setup_render_state(ctx, sv, color, true);
+
+ vb.stride = sizeof(struct st_util_vertex);
+
+ u_upload_alloc(st->uploader, 0, num_vert_bytes, 4,
+ &vb.buffer_offset, &vb.buffer, (void **) &verts);
+
+ /* build quads vertex data */
+ for (i = 0; i < count; i++) {
+ const GLfloat epsilon = 0.0001F;
+ const struct gl_bitmap_glyph *g = &atlas->glyphs[ids[i]];
+ const float xmove = g->xmove, ymove = g->ymove;
+ const float xorig = g->xorig, yorig = g->yorig;
+ const float s0 = g->x, t0 = g->y;
+ const float s1 = s0 + g->w, t1 = t0 + g->h;
+ const float x0 = IFLOOR(ctx->Current.RasterPos[0] - xorig + epsilon);
+ const float y0 = IFLOOR(ctx->Current.RasterPos[1] - yorig + epsilon);
+ const float x1 = x0 + g->w, y1 = y0 + g->h;
+ const float clip_x0 = x0 * clip_x_scale - 1.0f;
+ const float clip_y0 = y0 * clip_y_scale - 1.0f;
+ const float clip_x1 = x1 * clip_x_scale - 1.0f;
+ const float clip_y1 = y1 * clip_y_scale - 1.0f;
+
+ /* lower-left corner */
+ verts->x = clip_x0;
+ verts->y = clip_y0;
+ verts->z = z;
+ verts->r = color[0];
+ verts->g = color[1];
+ verts->b = color[2];
+ verts->a = color[3];
+ verts->s = s0;
+ verts->t = t0;
+ verts++;
+
+ /* lower-right corner */
+ verts->x = clip_x1;
+ verts->y = clip_y0;
+ verts->z = z;
+ verts->r = color[0];
+ verts->g = color[1];
+ verts->b = color[2];
+ verts->a = color[3];
+ verts->s = s1;
+ verts->t = t0;
+ verts++;
+
+ /* upper-right corner */
+ verts->x = clip_x1;
+ verts->y = clip_y1;
+ verts->z = z;
+ verts->r = color[0];
+ verts->g = color[1];
+ verts->b = color[2];
+ verts->a = color[3];
+ verts->s = s1;
+ verts->t = t1;
+ verts++;
+
+ /* upper-left corner */
+ verts->x = clip_x0;
+ verts->y = clip_y1;
+ verts->z = z;
+ verts->r = color[0];
+ verts->g = color[1];
+ verts->b = color[2];
+ verts->a = color[3];
+ verts->s = s0;
+ verts->t = t1;
+ verts++;
+
+ /* Update the raster position */
+ ctx->Current.RasterPos[0] += xmove;
+ ctx->Current.RasterPos[1] += ymove;
+ }
+
+ u_upload_unmap(st->uploader);
+
+ cso_set_vertex_buffers(st->cso_context,
+ cso_get_aux_vertex_buffer_slot(st->cso_context),
+ 1, &vb);
+
+ cso_draw_arrays(st->cso_context, PIPE_PRIM_QUADS, 0, num_verts);
+
+ restore_render_state(ctx);
+
+ pipe_resource_reference(&vb.buffer, NULL);
+
+ /* We uploaded modified constants, need to invalidate them. */
+ st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
+}
+
+
+
/** Per-context init */
void
st_init_bitmap_functions(struct dd_function_table *functions)
{
functions->Bitmap = st_Bitmap;
+ functions->DrawAtlasBitmaps = st_DrawAtlasBitmaps;
}
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 7b6d10e76b1..55801469f23 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -44,6 +44,7 @@
#include "st_cb_bitmap.h"
#include "st_cb_clear.h"
#include "st_cb_fbo.h"
+#include "st_draw.h"
#include "st_format.h"
#include "st_program.h"
@@ -55,8 +56,6 @@
#include "util/u_framebuffer.h"
#include "util/u_inlines.h"
#include "util/u_simple_shaders.h"
-#include "util/u_draw_quad.h"
-#include "util/u_upload_mgr.h"
#include "cso_cache/cso_context.h"
@@ -169,67 +168,6 @@ set_vertex_shader_layered(struct st_context *st)
/**
- * Draw a screen-aligned quadrilateral.
- * Coords are clip coords with y=0=bottom.
- */
-static void
-draw_quad(struct st_context *st,
- float x0, float y0, float x1, float y1, GLfloat z,
- unsigned num_instances,
- const union pipe_color_union *color)
-{
- struct cso_context *cso = st->cso_context;
- struct pipe_vertex_buffer vb = {0};
- GLuint i;
- float (*vertices)[2][4]; /**< vertex pos + color */
-
- vb.stride = 8 * sizeof(float);
-
- u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4,
- &vb.buffer_offset, &vb.buffer,
- (void **) &vertices);
- if (!vb.buffer) {
- return;
- }
-
- /* Convert Z from [0,1] to [-1,1] range */
- z = z * 2.0f - 1.0f;
-
- /* positions */
- vertices[0][0][0] = x0;
- vertices[0][0][1] = y0;
-
- vertices[1][0][0] = x1;
- vertices[1][0][1] = y0;
-
- vertices[2][0][0] = x1;
- vertices[2][0][1] = y1;
-
- vertices[3][0][0] = x0;
- vertices[3][0][1] = y1;
-
- /* same for all verts: */
- for (i = 0; i < 4; i++) {
- vertices[i][0][2] = z;
- vertices[i][0][3] = 1.0;
- vertices[i][1][0] = color->f[0];
- vertices[i][1][1] = color->f[1];
- vertices[i][1][2] = color->f[2];
- vertices[i][1][3] = color->f[3];
- }
-
- u_upload_unmap(st->uploader);
-
- /* draw */
- cso_set_vertex_buffers(cso, cso_get_aux_vertex_buffer_slot(cso), 1, &vb);
- cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_FAN, 0, 4,
- 0, num_instances);
- pipe_resource_reference(&vb.buffer, NULL);
-}
-
-
-
-/**
* Do glClear by drawing a quadrilateral.
* The vertices of the quad will be computed from the
* ctx->DrawBuffer->_X/Ymin/max fields.
@@ -238,6 +176,7 @@ static void
clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
{
struct st_context *st = st_context(ctx);
+ struct cso_context *cso = st->cso_context;
const struct gl_framebuffer *fb = ctx->DrawBuffer;
const GLfloat fb_width = (GLfloat) fb->Width;
const GLfloat fb_height = (GLfloat) fb->Height;
@@ -257,21 +196,17 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
x1, y1);
*/
- cso_save_blend(st->cso_context);
- cso_save_stencil_ref(st->cso_context);
- cso_save_depth_stencil_alpha(st->cso_context);
- cso_save_rasterizer(st->cso_context);
- cso_save_sample_mask(st->cso_context);
- cso_save_min_samples(st->cso_context);
- cso_save_viewport(st->cso_context);
- cso_save_fragment_shader(st->cso_context);
- cso_save_stream_outputs(st->cso_context);
- cso_save_vertex_shader(st->cso_context);
- cso_save_tessctrl_shader(st->cso_context);
- cso_save_tesseval_shader(st->cso_context);
- cso_save_geometry_shader(st->cso_context);
- cso_save_vertex_elements(st->cso_context);
- cso_save_aux_vertex_buffer_slot(st->cso_context);
+ cso_save_state(cso, (CSO_BIT_BLEND |
+ CSO_BIT_STENCIL_REF |
+ CSO_BIT_DEPTH_STENCIL_ALPHA |
+ CSO_BIT_RASTERIZER |
+ CSO_BIT_SAMPLE_MASK |
+ CSO_BIT_MIN_SAMPLES |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BITS_ALL_SHADERS));
/* blend state: RGBA masking */
{
@@ -298,10 +233,10 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
blend.rt[i].colormask |= PIPE_MASK_A;
}
- if (st->ctx->Color.DitherFlag)
+ if (ctx->Color.DitherFlag)
blend.dither = 1;
}
- cso_set_blend(st->cso_context, &blend);
+ cso_set_blend(cso, &blend);
}
/* depth_stencil state: always pass/set to ref value */
@@ -325,64 +260,49 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
depth_stencil.stencil[0].valuemask = 0xff;
depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
stencil_ref.ref_value[0] = ctx->Stencil.Clear;
- cso_set_stencil_ref(st->cso_context, &stencil_ref);
+ cso_set_stencil_ref(cso, &stencil_ref);
}
- cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil);
+ cso_set_depth_stencil_alpha(cso, &depth_stencil);
}
- cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw);
- cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
- cso_set_sample_mask(st->cso_context, ~0);
- cso_set_min_samples(st->cso_context, 1);
- cso_set_rasterizer(st->cso_context, &st->clear.raster);
+ cso_set_vertex_elements(cso, 2, st->util_velems);
+ cso_set_stream_outputs(cso, 0, NULL, NULL);
+ cso_set_sample_mask(cso, ~0);
+ cso_set_min_samples(cso, 1);
+ cso_set_rasterizer(cso, &st->clear.raster);
/* viewport state: viewport matching window dims */
- {
- const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * fb_width;
- vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 0.5f;
- vp.translate[0] = 0.5f * fb_width;
- vp.translate[1] = 0.5f * fb_height;
- vp.translate[2] = 0.5f;
- cso_set_viewport(st->cso_context, &vp);
- }
+ cso_set_viewport_dims(st->cso_context, fb_width, fb_height,
+ st_fb_orientation(fb) == Y_0_TOP);
set_fragment_shader(st);
- cso_set_tessctrl_shader_handle(st->cso_context, NULL);
- cso_set_tesseval_shader_handle(st->cso_context, NULL);
+ cso_set_tessctrl_shader_handle(cso, NULL);
+ cso_set_tesseval_shader_handle(cso, NULL);
if (num_layers > 1)
set_vertex_shader_layered(st);
else
set_vertex_shader(st);
- /* We can't translate the clear color to the colorbuffer format,
+ /* draw quad matching scissor rect.
+ *
+ * Note: if we're only clearing depth/stencil we still setup vertices
+ * with color, but they'll be ignored.
+ *
+ * We can't translate the clear color to the colorbuffer format,
* because different colorbuffers may have different formats.
*/
-
- /* draw quad matching scissor rect */
- draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, num_layers,
- (union pipe_color_union*)&ctx->Color.ClearColor);
+ if (!st_draw_quad(st, x0, y0, x1, y1,
+ ctx->Depth.Clear * 2.0f - 1.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ (const float *) &ctx->Color.ClearColor.f,
+ num_layers)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glClear");
+ }
/* Restore pipe state */
- cso_restore_blend(st->cso_context);
- cso_restore_stencil_ref(st->cso_context);
- cso_restore_depth_stencil_alpha(st->cso_context);
- cso_restore_rasterizer(st->cso_context);
- cso_restore_sample_mask(st->cso_context);
- cso_restore_min_samples(st->cso_context);
- cso_restore_viewport(st->cso_context);
- cso_restore_fragment_shader(st->cso_context);
- cso_restore_vertex_shader(st->cso_context);
- cso_restore_tessctrl_shader(st->cso_context);
- cso_restore_tesseval_shader(st->cso_context);
- cso_restore_geometry_shader(st->cso_context);
- cso_restore_vertex_elements(st->cso_context);
- cso_restore_aux_vertex_buffer_slot(st->cso_context);
- cso_restore_stream_outputs(st->cso_context);
+ cso_restore_state(cso);
}
@@ -470,7 +390,7 @@ st_Clear(struct gl_context *ctx, GLbitfield mask)
st_flush_bitmap_cache(st);
/* This makes sure the pipe has the latest scissor, etc values */
- st_validate_state( st );
+ st_validate_state( st, ST_PIPELINE_RENDER );
if (mask & BUFFER_BITS_COLOR) {
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
diff --git a/src/mesa/state_tracker/st_cb_compute.c b/src/mesa/state_tracker/st_cb_compute.c
new file mode 100644
index 00000000000..364159d62d8
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_compute.c
@@ -0,0 +1,85 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Samuel Pitoiset
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/state.h"
+#include "st_atom.h"
+#include "st_context.h"
+#include "st_cb_bufferobjects.h"
+#include "st_cb_compute.h"
+
+#include "pipe/p_context.h"
+
+static void st_dispatch_compute_common(struct gl_context *ctx,
+ const GLuint *num_groups,
+ struct pipe_resource *indirect,
+ GLintptr indirect_offset)
+{
+ struct gl_shader_program *prog =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_grid_info info = { 0 };
+
+ if (ctx->NewState)
+ _mesa_update_state(ctx);
+
+ if (st->dirty_cp.st || ctx->NewDriverState)
+ st_validate_state(st, ST_PIPELINE_COMPUTE);
+
+ for (unsigned i = 0; i < 3; i++) {
+ info.block[i] = prog->Comp.LocalSize[i];
+ info.grid[i] = num_groups ? num_groups[i] : 0;
+ }
+
+ if (indirect) {
+ info.indirect = indirect;
+ info.indirect_offset = indirect_offset;
+ }
+
+ pipe->launch_grid(pipe, &info);
+}
+
+static void st_dispatch_compute(struct gl_context *ctx,
+ const GLuint *num_groups)
+{
+ st_dispatch_compute_common(ctx, num_groups, NULL, 0);
+}
+
+static void st_dispatch_compute_indirect(struct gl_context *ctx,
+ GLintptr indirect_offset)
+{
+ struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer;
+ struct pipe_resource *indirect = st_buffer_object(indirect_buffer)->buffer;
+
+ st_dispatch_compute_common(ctx, NULL, indirect, indirect_offset);
+}
+
+void st_init_compute_functions(struct dd_function_table *functions)
+{
+ functions->DispatchCompute = st_dispatch_compute;
+ functions->DispatchComputeIndirect = st_dispatch_compute_indirect;
+}
diff --git a/src/mesa/state_tracker/st_cb_compute.h b/src/mesa/state_tracker/st_cb_compute.h
new file mode 100644
index 00000000000..78ec756a69e
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_compute.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Samuel Pitoiset
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef ST_CB_COMPUTE_H
+#define ST_CB_COMPUTE_H
+
+#include "main/compiler.h"
+
+struct dd_function_table;
+
+extern void
+st_init_compute_functions(struct dd_function_table *functions);
+
+#endif /* ST_CB_COMPUTE_H */
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index fd58886a782..51d4ae51918 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -35,6 +35,7 @@
#include "main/bufferobj.h"
#include "main/blit.h"
#include "main/format_pack.h"
+#include "main/framebuffer.h"
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/pack.h"
@@ -56,6 +57,7 @@
#include "st_cb_fbo.h"
#include "st_context.h"
#include "st_debug.h"
+#include "st_draw.h"
#include "st_format.h"
#include "st_program.h"
#include "st_texture.h"
@@ -63,16 +65,45 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "tgsi/tgsi_ureg.h"
-#include "util/u_draw_quad.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_tile.h"
-#include "util/u_upload_mgr.h"
#include "cso_cache/cso_context.h"
/**
+ * We have a simple glDrawPixels cache to try to optimize the case where the
+ * same image is drawn over and over again. It basically works as follows:
+ *
+ * 1. After we construct a texture map with the image and draw it, we do
+ * not discard the texture. We keep it around, plus we note the
+ * glDrawPixels width, height, format, etc. parameters and keep a copy
+ * of the image in a malloc'd buffer.
+ *
+ * 2. On the next glDrawPixels we check if the parameters match the previous
+ * call. If those match, we check if the image matches the previous image
+ * via a memcmp() call. If everything matches, we re-use the previous
+ * texture, thereby avoiding the cost creating a new texture and copying
+ * the image to it.
+ *
+ * The effectiveness of this cache depends upon:
+ * 1. If the memcmp() finds a difference, it happens relatively quickly.
+ Hopefully, not just the last pixels differ!
+ * 2. If the memcmp() finds no difference, doing that check is faster than
+ * creating and loading a texture.
+ *
+ * Notes:
+ * 1. We don't support any pixel unpacking parameters.
+ * 2. We don't try to cache images in Pixel Buffer Objects.
+ * 3. Instead of saving the whole image, perhaps some sort of reliable
+ * checksum function could be used instead.
+ */
+#define USE_DRAWPIXELS_CACHE 1
+
+
+
+/**
* Create fragment program that does a TEX() instruction to get a Z and/or
* stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL.
* Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX).
@@ -162,22 +193,22 @@ make_passthrough_vertex_shader(struct st_context *st,
return NULL;
/* MOV result.pos, vertex.pos; */
- ureg_MOV(ureg,
+ ureg_MOV(ureg,
ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ),
ureg_DECL_vs_input( ureg, 0 ));
-
- /* MOV result.texcoord0, vertex.attr[1]; */
- ureg_MOV(ureg,
- ureg_DECL_output( ureg, texcoord_semantic, 0 ),
- ureg_DECL_vs_input( ureg, 1 ));
-
+
if (passColor) {
- /* MOV result.color0, vertex.attr[2]; */
- ureg_MOV(ureg,
+ /* MOV result.color0, vertex.attr[1]; */
+ ureg_MOV(ureg,
ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ),
- ureg_DECL_vs_input( ureg, 2 ));
+ ureg_DECL_vs_input( ureg, 1 ));
}
+ /* MOV result.texcoord0, vertex.attr[2]; */
+ ureg_MOV(ureg,
+ ureg_DECL_output( ureg, texcoord_semantic, 0 ),
+ ureg_DECL_vs_input( ureg, 2 ));
+
ureg_END( ureg );
st->drawpix.vert_shaders[passColor] =
@@ -347,6 +378,39 @@ make_texture(struct st_context *st,
enum pipe_format pipeFormat;
GLenum baseInternalFormat;
+#if USE_DRAWPIXELS_CACHE
+ const GLint bpp = _mesa_bytes_per_pixel(format, type);
+
+ /* Check if the glDrawPixels() parameters and state matches the cache */
+ if (width == st->drawpix_cache.width &&
+ height == st->drawpix_cache.height &&
+ format == st->drawpix_cache.format &&
+ type == st->drawpix_cache.type &&
+ pixels == st->drawpix_cache.user_pointer &&
+ !_mesa_is_bufferobj(unpack->BufferObj) &&
+ (unpack->RowLength == 0 || unpack->RowLength == width) &&
+ unpack->SkipPixels == 0 &&
+ unpack->SkipRows == 0 &&
+ unpack->SwapBytes == GL_FALSE &&
+ st->drawpix_cache.image) {
+ /* check if the pixel data is the same */
+ if (memcmp(pixels, st->drawpix_cache.image, width * height * bpp) == 0) {
+ /* OK, re-use the cached texture */
+ return st->drawpix_cache.texture;
+ }
+ }
+
+ /* discard the cached image and texture (if there is one) */
+ st->drawpix_cache.width = 0;
+ st->drawpix_cache.height = 0;
+ st->drawpix_cache.user_pointer = NULL;
+ if (st->drawpix_cache.image) {
+ free(st->drawpix_cache.image);
+ st->drawpix_cache.image = NULL;
+ }
+ pipe_resource_reference(&st->drawpix_cache.texture, NULL);
+#endif
+
/* Choose a pixel format for the temp texture which will hold the
* image to draw.
*/
@@ -437,107 +501,29 @@ make_texture(struct st_context *st,
_mesa_unmap_pbo_source(ctx, unpack);
- return pt;
-}
-
-
-/**
- * Draw quad with texcoords and optional color.
- * Coords are gallium window coords with y=0=top.
- * \param color may be null
- * \param invertTex if true, flip texcoords vertically
- */
-static void
-draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
- GLfloat x1, GLfloat y1, const GLfloat *color,
- GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- GLfloat (*verts)[3][4]; /* four verts, three attribs, XYZW */
- struct pipe_resource *buf = NULL;
- unsigned offset;
-
- u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), 4, &offset,
- &buf, (void **) &verts);
- if (!buf) {
- return;
- }
-
- /* setup vertex data */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat) fb->Width;
- const GLfloat fb_height = (GLfloat) fb->Height;
- const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f;
- const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f;
- const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f;
- const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f;
- const GLfloat sLeft = 0.0f, sRight = maxXcoord;
- const GLfloat tTop = invertTex ? maxYcoord : 0.0f;
- const GLfloat tBot = invertTex ? 0.0f : maxYcoord;
- GLuint i;
-
- /* upper-left */
- verts[0][0][0] = clip_x0; /* v[0].attr[0].x */
- verts[0][0][1] = clip_y0; /* v[0].attr[0].y */
-
- /* upper-right */
- verts[1][0][0] = clip_x1;
- verts[1][0][1] = clip_y0;
-
- /* lower-right */
- verts[2][0][0] = clip_x1;
- verts[2][0][1] = clip_y1;
-
- /* lower-left */
- verts[3][0][0] = clip_x0;
- verts[3][0][1] = clip_y1;
-
- verts[0][1][0] = sLeft; /* v[0].attr[1].S */
- verts[0][1][1] = tTop; /* v[0].attr[1].T */
- verts[1][1][0] = sRight;
- verts[1][1][1] = tTop;
- verts[2][1][0] = sRight;
- verts[2][1][1] = tBot;
- verts[3][1][0] = sLeft;
- verts[3][1][1] = tBot;
-
- /* same for all verts: */
- if (color) {
- for (i = 0; i < 4; i++) {
- verts[i][0][2] = z; /* v[i].attr[0].z */
- verts[i][0][3] = 1.0f; /* v[i].attr[0].w */
- verts[i][2][0] = color[0]; /* v[i].attr[2].r */
- verts[i][2][1] = color[1]; /* v[i].attr[2].g */
- verts[i][2][2] = color[2]; /* v[i].attr[2].b */
- verts[i][2][3] = color[3]; /* v[i].attr[2].a */
- verts[i][1][2] = 0.0f; /* v[i].attr[1].R */
- verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */
- }
- }
- else {
- for (i = 0; i < 4; i++) {
- verts[i][0][2] = z; /*Z*/
- verts[i][0][3] = 1.0f; /*W*/
- verts[i][1][2] = 0.0f; /*R*/
- verts[i][1][3] = 1.0f; /*Q*/
- }
+#if USE_DRAWPIXELS_CACHE
+ /* Save the glDrawPixels parameter and image in the cache */
+ if ((unpack->RowLength == 0 || unpack->RowLength == width) &&
+ unpack->SkipPixels == 0 &&
+ unpack->SkipRows == 0) {
+ st->drawpix_cache.width = width;
+ st->drawpix_cache.height = height;
+ st->drawpix_cache.format = format;
+ st->drawpix_cache.type = type;
+ st->drawpix_cache.user_pointer = pixels;
+ assert(!st->drawpix_cache.image);
+ st->drawpix_cache.image = malloc(width * height * bpp);
+ if (st->drawpix_cache.image) {
+ memcpy(st->drawpix_cache.image, pixels, width * height * bpp);
}
+ st->drawpix_cache.texture = pt;
}
+#endif
- u_upload_unmap(st->uploader);
- util_draw_vertex_buffer(pipe, st->cso_context, buf,
- cso_get_aux_vertex_buffer_slot(st->cso_context),
- offset,
- PIPE_PRIM_QUADS,
- 4, /* verts */
- 3); /* attribs/vert */
- pipe_resource_reference(&buf, NULL);
+ return pt;
}
-
static void
draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
GLsizei width, GLsizei height,
@@ -554,9 +540,12 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
struct cso_context *cso = st->cso_context;
+ const unsigned fb_width = _mesa_geometric_width(ctx->DrawBuffer);
+ const unsigned fb_height = _mesa_geometric_height(ctx->DrawBuffer);
GLfloat x0, y0, x1, y1;
GLsizei maxSize;
boolean normalized = sv[0]->texture->target == PIPE_TEXTURE_2D;
+ unsigned cso_state_mask;
assert(sv[0]->texture->target == st->internal_target);
@@ -569,22 +558,19 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
assert(width <= maxSize);
assert(height <= maxSize);
- cso_save_rasterizer(cso);
- cso_save_viewport(cso);
- cso_save_fragment_samplers(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_fragment_shader(cso);
- cso_save_stream_outputs(cso);
- cso_save_vertex_shader(cso);
- cso_save_tessctrl_shader(cso);
- cso_save_tesseval_shader(cso);
- cso_save_geometry_shader(cso);
- cso_save_vertex_elements(cso);
- cso_save_aux_vertex_buffer_slot(cso);
+ cso_state_mask = (CSO_BIT_RASTERIZER |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_FRAGMENT_SAMPLERS |
+ CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BITS_ALL_SHADERS);
if (write_stencil) {
- cso_save_depth_stencil_alpha(cso);
- cso_save_blend(cso);
+ cso_state_mask |= (CSO_BIT_DEPTH_STENCIL_ALPHA |
+ CSO_BIT_BLEND);
}
+ cso_save_state(cso, cso_state_mask);
/* rasterizer state: just scissor */
{
@@ -694,28 +680,17 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
}
/* viewport state: viewport matching window dims */
- {
- const float w = (float) ctx->DrawBuffer->Width;
- const float h = (float) ctx->DrawBuffer->Height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * w;
- vp.scale[1] = -0.5f * h;
- vp.scale[2] = 0.5f;
- vp.translate[0] = 0.5f * w;
- vp.translate[1] = 0.5f * h;
- vp.translate[2] = 0.5f;
- cso_set_viewport(cso, &vp);
- }
+ cso_set_viewport_dims(cso, fb_width, fb_height, TRUE);
- cso_set_vertex_elements(cso, 3, st->velems_util_draw);
- cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
+ cso_set_vertex_elements(cso, 3, st->util_velems);
+ cso_set_stream_outputs(cso, 0, NULL, NULL);
/* Compute Gallium window coords (y=0=top) with pixel zoom.
* Recall that these coords are transformed by the current
* vertex shader and viewport transformation.
*/
if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) {
- y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY);
+ y = fb_height - (int) (y + height * ctx->Pixel.ZoomY);
invertTex = !invertTex;
}
@@ -727,27 +702,27 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
/* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
z = z * 2.0f - 1.0f;
- draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,
- normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width,
- normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height);
+ {
+ const float clip_x0 = x0 / (float) fb_width * 2.0f - 1.0f;
+ const float clip_y0 = y0 / (float) fb_height * 2.0f - 1.0f;
+ const float clip_x1 = x1 / (float) fb_width * 2.0f - 1.0f;
+ const float clip_y1 = y1 / (float) fb_height * 2.0f - 1.0f;
+ const float maxXcoord = normalized ?
+ ((float) width / sv[0]->texture->width0) : (float) width;
+ const float maxYcoord = normalized
+ ? ((float) height / sv[0]->texture->height0) : (float) height;
+ const float sLeft = 0.0f, sRight = maxXcoord;
+ const float tTop = invertTex ? maxYcoord : 0.0f;
+ const float tBot = invertTex ? 0.0f : maxYcoord;
+
+ if (!st_draw_quad(st, clip_x0, clip_y0, clip_x1, clip_y1, z,
+ sLeft, tBot, sRight, tTop, color, 0)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
+ }
+ }
/* restore state */
- cso_restore_rasterizer(cso);
- cso_restore_viewport(cso);
- cso_restore_fragment_samplers(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_tessctrl_shader(cso);
- cso_restore_tesseval_shader(cso);
- cso_restore_geometry_shader(cso);
- cso_restore_vertex_elements(cso);
- cso_restore_aux_vertex_buffer_slot(cso);
- cso_restore_stream_outputs(cso);
- if (write_stencil) {
- cso_restore_depth_stencil_alpha(cso);
- cso_restore_blend(cso);
- }
+ cso_restore_state(cso);
}
@@ -956,7 +931,7 @@ get_color_fp_variant(struct st_context *st)
ctx->Pixel.AlphaScale != 1.0);
key.pixelMaps = ctx->Pixel.MapColorFlag;
key.clamp_color = st->clamp_frag_color_in_shader &&
- st->ctx->Color._ClampFragmentColor;
+ ctx->Color._ClampFragmentColor;
fpv = st_get_fp_variant(st, st->fp, &key);
@@ -1060,7 +1035,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
{
void *driver_vp, *driver_fp;
struct st_context *st = st_context(ctx);
- const GLfloat *color;
struct pipe_context *pipe = st->pipe;
GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
struct pipe_sampler_view *sv[2] = { NULL };
@@ -1074,7 +1048,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
st_flush_bitmap_cache(st);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
/* Limit the size of the glDrawPixels to the max texture size.
* Strictly speaking, that's not correct but since we don't handle
@@ -1106,7 +1080,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
driver_fp = get_drawpix_z_stencil_program(st, write_depth,
write_stencil);
driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
- color = ctx->Current.RasterColor;
}
else {
fpv = get_color_fp_variant(st);
@@ -1114,7 +1087,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
driver_fp = fpv->driver_shader;
driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
- color = NULL;
if (ctx->Pixel.MapColorFlag) {
pipe_sampler_view_reference(&sv[1],
st->pixel_xfer.pixelmap_sampler_view);
@@ -1172,12 +1144,15 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
num_sampler_view,
driver_vp,
driver_fp, fpv,
- color, GL_FALSE, write_depth, write_stencil);
+ ctx->Current.RasterColor,
+ GL_FALSE, write_depth, write_stencil);
pipe_sampler_view_reference(&sv[0], NULL);
if (num_sampler_view > 1)
pipe_sampler_view_reference(&sv[1], NULL);
+#if !USE_DRAWPIXELS_CACHE
pipe_resource_reference(&pt, NULL);
+#endif
}
@@ -1427,7 +1402,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
struct pipe_sampler_view *sv[2] = { NULL };
struct st_fp_variant *fpv = NULL;
int num_sampler_view = 1;
- GLfloat *color;
enum pipe_format srcFormat;
unsigned srcBind;
GLboolean invertTex = GL_FALSE;
@@ -1436,7 +1410,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
st_flush_bitmap_cache(st);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
if (type == GL_DEPTH_STENCIL) {
/* XXX make this more efficient */
@@ -1469,7 +1443,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
fpv = get_color_fp_variant(st);
rbRead = st_get_color_read_renderbuffer(ctx);
- color = NULL;
driver_fp = fpv->driver_shader;
driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
@@ -1490,7 +1463,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
assert(type == GL_DEPTH);
rbRead = st_renderbuffer(ctx->ReadBuffer->
Attachment[BUFFER_DEPTH].Renderbuffer);
- color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
driver_fp = get_drawpix_z_stencil_program(st, GL_TRUE, GL_FALSE);
driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
@@ -1622,7 +1594,8 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
num_sampler_view,
driver_vp,
driver_fp, fpv,
- color, invertTex, GL_FALSE, GL_FALSE);
+ ctx->Current.Attrib[VERT_ATTRIB_COLOR0],
+ invertTex, GL_FALSE, GL_FALSE);
pipe_resource_reference(&pt, NULL);
pipe_sampler_view_reference(&sv[0], NULL);
diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c
index e6ab77fb521..a7926295277 100644
--- a/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/src/mesa/state_tracker/st_cb_drawtex.c
@@ -49,6 +49,7 @@ struct cached_shader
/**
* Simple linear list cache.
* Most of the time there'll only be one cached shader.
+ * XXX This should be per-st_context state.
*/
static struct cached_shader CachedShaders[MAX_SHADERS];
static GLuint NumCachedShaders = 0;
@@ -99,13 +100,14 @@ lookup_shader(struct pipe_context *pipe,
return CachedShaders[i].handle;
}
+
static void
st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
GLfloat width, GLfloat height)
{
struct st_context *st = ctx->st;
struct pipe_context *pipe = st->pipe;
- struct cso_context *cso = ctx->st->cso_context;
+ struct cso_context *cso = st->cso_context;
struct pipe_resource *vbuffer = NULL;
GLuint i, numTexCoords, numAttribs;
GLboolean emitColor;
@@ -116,7 +118,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
st_flush_bitmap_cache(st);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
/* determine if we need vertex color */
if (ctx->FragmentProgram._Current->Base.InputsRead & VARYING_BIT_COL0)
@@ -150,7 +152,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height;
GLfloat *vbuf = NULL;
- GLuint attr;
+ GLuint tex_attr;
u_upload_alloc(st->uploader, 0,
numAttribs * 4 * 4 * sizeof(GLfloat), 4,
@@ -158,12 +160,12 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
if (!vbuffer) {
return;
}
-
+
z = CLAMP(z, 0.0f, 1.0f);
/* positions (in clip coords) */
{
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+ const struct gl_framebuffer *fb = ctx->DrawBuffer;
const GLfloat fb_width = (GLfloat)fb->Width;
const GLfloat fb_height = (GLfloat)fb->Height;
@@ -190,10 +192,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]);
semantic_names[1] = TGSI_SEMANTIC_COLOR;
semantic_indexes[1] = 0;
- attr = 2;
+ tex_attr = 2;
}
else {
- attr = 1;
+ tex_attr = 1;
}
/* texcoords */
@@ -210,17 +212,17 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht;
/*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/
- SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */
- SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */
- SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */
- SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */
+ SET_ATTRIB(0, tex_attr, s0, t0, 0.0f, 1.0f); /* lower left */
+ SET_ATTRIB(1, tex_attr, s1, t0, 0.0f, 1.0f); /* lower right */
+ SET_ATTRIB(2, tex_attr, s1, t1, 0.0f, 1.0f); /* upper right */
+ SET_ATTRIB(3, tex_attr, s0, t1, 0.0f, 1.0f); /* upper left */
- semantic_names[attr] = st->needs_texcoord_semantic ?
+ semantic_names[tex_attr] = st->needs_texcoord_semantic ?
TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
/* XXX: should this use semantic index i instead of 0 ? */
- semantic_indexes[attr] = 0;
+ semantic_indexes[tex_attr] = 0;
- attr++;
+ tex_attr++;
}
}
@@ -229,15 +231,14 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
#undef SET_ATTRIB
}
-
- cso_save_viewport(cso);
- cso_save_stream_outputs(cso);
- cso_save_vertex_shader(cso);
- cso_save_tessctrl_shader(cso);
- cso_save_tesseval_shader(cso);
- cso_save_geometry_shader(cso);
- cso_save_vertex_elements(cso);
- cso_save_aux_vertex_buffer_slot(cso);
+ cso_save_state(cso, (CSO_BIT_VIEWPORT |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VERTEX_SHADER |
+ CSO_BIT_TESSCTRL_SHADER |
+ CSO_BIT_TESSEVAL_SHADER |
+ CSO_BIT_GEOMETRY_SHADER |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT));
{
void *vs = lookup_shader(pipe, numAttribs,
@@ -255,11 +256,11 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
}
cso_set_vertex_elements(cso, numAttribs, velements);
- cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
+ cso_set_stream_outputs(cso, 0, NULL, NULL);
/* viewport state: viewport matching window dims */
{
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+ const struct gl_framebuffer *fb = ctx->DrawBuffer;
const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
const GLfloat width = (GLfloat)fb->Width;
const GLfloat height = (GLfloat)fb->Height;
@@ -273,7 +274,6 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
cso_set_viewport(cso, &vp);
}
-
util_draw_vertex_buffer(pipe, cso, vbuffer,
cso_get_aux_vertex_buffer_slot(cso),
offset, /* offset */
@@ -281,18 +281,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
4, /* verts */
numAttribs); /* attribs/vert */
-
pipe_resource_reference(&vbuffer, NULL);
/* restore state */
- cso_restore_viewport(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_tessctrl_shader(cso);
- cso_restore_tesseval_shader(cso);
- cso_restore_geometry_shader(cso);
- cso_restore_vertex_elements(cso);
- cso_restore_aux_vertex_buffer_slot(cso);
- cso_restore_stream_outputs(cso);
+ cso_restore_state(cso);
}
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 2a2eb0992c8..82ab914503b 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -44,6 +44,7 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
+#include "st_atom.h"
#include "st_context.h"
#include "st_cb_fbo.h"
#include "st_cb_flush.h"
@@ -711,9 +712,17 @@ st_ReadBuffer(struct gl_context *ctx, GLenum buffer)
(void) buffer;
- /* add the renderbuffer on demand */
- if (fb->_ColorReadBufferIndex >= 0)
+ /* Check if we need to allocate a front color buffer.
+ * Front buffers are often allocated on demand (other color buffers are
+ * always allocated in advance).
+ */
+ if ((fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT ||
+ fb->_ColorReadBufferIndex == BUFFER_FRONT_RIGHT) &&
+ fb->Attachment[fb->_ColorReadBufferIndex].Type == GL_NONE) {
+ /* add the buffer */
st_manager_add_color_renderbuffer(st, fb, fb->_ColorReadBufferIndex);
+ st_validate_state(st, ST_PIPELINE_RENDER);
+ }
}
diff --git a/src/mesa/state_tracker/st_cb_msaa.c b/src/mesa/state_tracker/st_cb_msaa.c
index e9955b62b8f..d581f2121b0 100644
--- a/src/mesa/state_tracker/st_cb_msaa.c
+++ b/src/mesa/state_tracker/st_cb_msaa.c
@@ -44,7 +44,7 @@ st_GetSamplePosition(struct gl_context *ctx,
{
struct st_context *st = st_context(ctx);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
if (st->pipe->get_sample_position)
st->pipe->get_sample_position(st->pipe, (unsigned) fb->Visual.samples,
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index ca493d84715..27cc0f3d154 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -74,6 +74,9 @@ st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog)
case GL_TESS_EVALUATION_PROGRAM_NV:
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
break;
+ case GL_COMPUTE_PROGRAM_NV:
+ st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM;
+ break;
}
}
@@ -92,6 +95,7 @@ st_use_program(struct gl_context *ctx, struct gl_shader_program *shProg)
st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
+ st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM;
}
@@ -123,6 +127,10 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id)
struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program);
return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
+ case GL_COMPUTE_PROGRAM_NV: {
+ struct st_compute_program *prog = ST_CALLOC_STRUCT(st_compute_program);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
+ }
default:
assert(0);
return NULL;
@@ -195,6 +203,17 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi);
}
break;
+ case GL_COMPUTE_PROGRAM_NV:
+ {
+ struct st_compute_program *stcp =
+ (struct st_compute_program *) prog;
+
+ st_release_cp_variants(st, stcp);
+
+ if (stcp->glsl_to_tgsi)
+ free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi);
+ }
+ break;
default:
assert(0); /* problem */
}
@@ -272,6 +291,17 @@ st_program_string_notify( struct gl_context *ctx,
if (st->tep == sttep)
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
}
+ else if (target == GL_COMPUTE_PROGRAM_NV) {
+ struct st_compute_program *stcp =
+ (struct st_compute_program *) prog;
+
+ st_release_cp_variants(st, stcp);
+ if (!st_translate_compute_program(st, stcp))
+ return false;
+
+ if (st->cp == stcp)
+ st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM;
+ }
if (ST_DEBUG & DEBUG_PRECOMPILE ||
st->shader_has_one_variant[stage])
diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c
index 747b41464ae..eec72f8a412 100644
--- a/src/mesa/state_tracker/st_cb_rasterpos.c
+++ b/src/mesa/state_tracker/st_cb_rasterpos.c
@@ -248,7 +248,7 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4])
draw_set_rasterize_stage(st->draw, st->rastpos_stage);
/* make sure everything's up to date */
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
/* This will get set only if rastpos_point(), above, gets called */
ctx->Current.RasterPosValid = GL_FALSE;
diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
index bb36e6969d6..5153c4bbba1 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -81,7 +81,7 @@ needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
* we do here should be free in such cases.
*/
static void
-st_readpixels(struct gl_context *ctx, GLint x, GLint y,
+st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
GLsizei width, GLsizei height,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *pack,
@@ -104,7 +104,7 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
/* Validate state (to be sure we have up-to-date framebuffer surfaces)
* and flush the bitmap cache prior to reading. */
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
st_flush_bitmap_cache(st);
if (!st->prefer_blit_based_texture_transfer) {
@@ -257,5 +257,5 @@ fallback:
void st_init_readpixels_functions(struct dd_function_table *functions)
{
- functions->ReadPixels = st_readpixels;
+ functions->ReadPixels = st_ReadPixels;
}
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index d53126a9441..cfec627f10c 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1272,10 +1272,11 @@ create_pbo_upload_fs(struct st_context *st)
ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
}
+ /* temp0.w = 0 */
+ ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0));
+
/* out = txf(sampler, temp0.x) */
- ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER,
- ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X),
- sampler);
+ ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler);
ureg_release_temporary(ureg, temp0);
@@ -1297,6 +1298,7 @@ try_pbo_upload_common(struct gl_context *ctx,
unsigned image_height)
{
struct st_context *st = st_context(ctx);
+ struct cso_context *cso = st->cso_context;
struct pipe_context *pipe = st->pipe;
unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1;
unsigned skip_pixels = 0;
@@ -1333,6 +1335,20 @@ try_pbo_upload_common(struct gl_context *ctx,
return false;
}
+ cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+ CSO_BIT_FRAGMENT_SAMPLERS |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BIT_FRAMEBUFFER |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_BLEND |
+ CSO_BIT_DEPTH_STENCIL_ALPHA |
+ CSO_BIT_RASTERIZER |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BITS_ALL_SHADERS));
+ cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
+
+
/* Set up the sampler_view */
{
unsigned first_element = buf_offset;
@@ -1340,14 +1356,17 @@ try_pbo_upload_common(struct gl_context *ctx,
+ (upload_height - 1 + (depth - 1) * image_height) * stride;
struct pipe_sampler_view templ;
struct pipe_sampler_view *sampler_view;
+ struct pipe_sampler_state sampler = {0};
+ const struct pipe_sampler_state *samplers[1] = {&sampler};
/* This should be ensured by Mesa before calling our callbacks */
assert((last_element + 1) * bytes_per_pixel <= buffer->width0);
if (last_element - first_element > ctx->Const.MaxTextureBufferSize - 1)
- return false;
+ goto fail;
memset(&templ, 0, sizeof(templ));
+ templ.target = PIPE_BUFFER;
templ.format = src_format;
templ.u.buf.first_element = first_element;
templ.u.buf.last_element = last_element;
@@ -1358,13 +1377,13 @@ try_pbo_upload_common(struct gl_context *ctx,
sampler_view = pipe->create_sampler_view(pipe, buffer, &templ);
if (sampler_view == NULL)
- return false;
+ goto fail;
- cso_save_fragment_sampler_views(st->cso_context);
- cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1,
- &sampler_view);
+ cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view);
pipe_sampler_view_reference(&sampler_view, NULL);
+
+ cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers);
}
/* Upload vertices */
@@ -1386,7 +1405,7 @@ try_pbo_upload_common(struct gl_context *ctx,
u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4,
&vbo.buffer_offset, &vbo.buffer, (void **) &verts);
if (!verts)
- goto fail_vertex_upload;
+ goto fail;
verts[0] = x0;
verts[1] = y0;
@@ -1401,30 +1420,28 @@ try_pbo_upload_common(struct gl_context *ctx,
velem.src_offset = 0;
velem.instance_divisor = 0;
- velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(st->cso_context);
+ velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(cso);
velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
- cso_save_vertex_elements(st->cso_context);
- cso_set_vertex_elements(st->cso_context, 1, &velem);
+ cso_set_vertex_elements(cso, 1, &velem);
- cso_save_aux_vertex_buffer_slot(st->cso_context);
- cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index,
- 1, &vbo);
+ cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo);
pipe_resource_reference(&vbo.buffer, NULL);
}
/* Upload constants */
+ /* Note: the user buffer must be valid until draw time */
+ struct {
+ int32_t xoffset;
+ int32_t yoffset;
+ int32_t stride;
+ int32_t image_size;
+ } constants;
+
{
struct pipe_constant_buffer cb;
- struct {
- int32_t xoffset;
- int32_t yoffset;
- int32_t stride;
- int32_t image_size;
- } constants;
-
constants.xoffset = -xoffset + skip_pixels;
constants.yoffset = -yoffset;
constants.stride = stride;
@@ -1434,10 +1451,10 @@ try_pbo_upload_common(struct gl_context *ctx,
cb.buffer = NULL;
cb.user_buffer = NULL;
u_upload_data(st->constbuf_uploader, 0, sizeof(constants),
- st->ctx->Const.UniformBufferOffsetAlignment,
+ ctx->Const.UniformBufferOffsetAlignment,
&constants, &cb.buffer_offset, &cb.buffer);
if (!cb.buffer)
- goto fail_constant_upload;
+ goto fail;
u_upload_unmap(st->constbuf_uploader);
} else {
@@ -1447,8 +1464,7 @@ try_pbo_upload_common(struct gl_context *ctx,
}
cb.buffer_size = sizeof(constants);
- cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
- cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, &cb);
+ cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb);
pipe_resource_reference(&cb.buffer, NULL);
}
@@ -1462,80 +1478,52 @@ try_pbo_upload_common(struct gl_context *ctx,
fb.nr_cbufs = 1;
pipe_surface_reference(&fb.cbufs[0], surface);
- cso_save_framebuffer(st->cso_context);
- cso_set_framebuffer(st->cso_context, &fb);
+ cso_set_framebuffer(cso, &fb);
pipe_surface_reference(&fb.cbufs[0], NULL);
}
- /* Viewport state */
- {
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * surface->width;
- vp.scale[1] = 0.5f * surface->height;
- vp.scale[2] = 1.0f;
- vp.translate[0] = 0.5f * surface->width;
- vp.translate[1] = 0.5f * surface->height;
- vp.translate[2] = 0.0f;
-
- cso_save_viewport(st->cso_context);
- cso_set_viewport(st->cso_context, &vp);
- }
+ cso_set_viewport_dims(cso, surface->width, surface->height, FALSE);
/* Blend state */
- cso_save_blend(st->cso_context);
- cso_set_blend(st->cso_context, &st->pbo_upload.blend);
+ cso_set_blend(cso, &st->pbo_upload.blend);
+
+ /* Depth/stencil/alpha state */
+ {
+ struct pipe_depth_stencil_alpha_state dsa;
+ memset(&dsa, 0, sizeof(dsa));
+ cso_set_depth_stencil_alpha(cso, &dsa);
+ }
/* Rasterizer state */
- cso_save_rasterizer(st->cso_context);
- cso_set_rasterizer(st->cso_context, &st->pbo_upload.raster);
+ cso_set_rasterizer(cso, &st->pbo_upload.raster);
/* Set up the shaders */
- cso_save_vertex_shader(st->cso_context);
- cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs);
+ cso_set_vertex_shader_handle(cso, st->pbo_upload.vs);
- cso_save_geometry_shader(st->cso_context);
- cso_set_geometry_shader_handle(st->cso_context,
- depth != 1 ? st->pbo_upload.gs : NULL);
+ cso_set_geometry_shader_handle(cso, depth != 1 ? st->pbo_upload.gs : NULL);
- cso_save_tessctrl_shader(st->cso_context);
- cso_set_tessctrl_shader_handle(st->cso_context, NULL);
+ cso_set_tessctrl_shader_handle(cso, NULL);
- cso_save_tesseval_shader(st->cso_context);
- cso_set_tesseval_shader_handle(st->cso_context, NULL);
+ cso_set_tesseval_shader_handle(cso, NULL);
- cso_save_fragment_shader(st->cso_context);
- cso_set_fragment_shader_handle(st->cso_context, st->pbo_upload.fs);
+ cso_set_fragment_shader_handle(cso, st->pbo_upload.fs);
/* Disable stream output */
- cso_save_stream_outputs(st->cso_context);
- cso_set_stream_outputs(st->cso_context, 0, NULL, 0);
+ cso_set_stream_outputs(cso, 0, NULL, 0);
if (depth == 1) {
- cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+ cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
} else {
- cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP,
+ cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP,
0, 4, 0, depth);
}
success = true;
- cso_restore_framebuffer(st->cso_context);
- cso_restore_viewport(st->cso_context);
- cso_restore_blend(st->cso_context);
- cso_restore_rasterizer(st->cso_context);
- cso_restore_vertex_shader(st->cso_context);
- cso_restore_geometry_shader(st->cso_context);
- cso_restore_tessctrl_shader(st->cso_context);
- cso_restore_tesseval_shader(st->cso_context);
- cso_restore_fragment_shader(st->cso_context);
- cso_restore_stream_outputs(st->cso_context);
- cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
-fail_constant_upload:
- cso_restore_vertex_elements(st->cso_context);
- cso_restore_aux_vertex_buffer_slot(st->cso_context);
-fail_vertex_upload:
- cso_restore_fragment_sampler_views(st->cso_context);
+fail:
+ cso_restore_state(cso);
+ cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
return success;
}
@@ -2752,7 +2740,7 @@ st_finalize_texture(struct gl_context *ctx,
{
struct st_context *st = st_context(ctx);
struct st_texture_object *stObj = st_texture_object(tObj);
- const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+ const GLuint nr_faces = _mesa_num_tex_faces(stObj->base.Target);
GLuint face;
const struct st_texture_image *firstImage;
enum pipe_format firstImageFormat;
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 9016846b148..e3ddee660f7 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -43,6 +43,7 @@
#include "st_cb_blit.h"
#include "st_cb_bufferobjects.h"
#include "st_cb_clear.h"
+#include "st_cb_compute.h"
#include "st_cb_condrender.h"
#include "st_cb_copyimage.h"
#include "st_cb_drawpixels.h"
@@ -138,8 +139,11 @@ void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state)
st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
}
+ /* Invalidate render and compute pipelines. */
st->dirty.mesa |= new_state;
st->dirty.st |= ST_NEW_MESA;
+ st->dirty_cp.mesa |= new_state;
+ st->dirty_cp.st |= ST_NEW_MESA;
/* This is the only core Mesa module we depend upon.
* No longer use swrast, swsetup, tnl.
@@ -182,6 +186,10 @@ st_destroy_context_priv(struct st_context *st)
u_upload_destroy(st->constbuf_uploader);
}
+ /* free glDrawPixels cache data */
+ free(st->drawpix_cache.image);
+ pipe_resource_reference(&st->drawpix_cache.texture, NULL);
+
cso_destroy_context(st->cso_context);
free( st );
}
@@ -208,8 +216,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
/* state tracker needs the VBO module */
_vbo_CreateContext(ctx);
+ /* Initialize render and compute pipelines flags */
st->dirty.mesa = ~0;
st->dirty.st = ~0;
+ st->dirty_cp.mesa = ~0;
+ st->dirty_cp.st = ~0;
/* Create upload manager for vertex data for glBitmap, glDrawPixels,
* glClear, etc.
@@ -241,16 +252,30 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
else
st->internal_target = PIPE_TEXTURE_RECT;
- /* Vertex element objects used for drawing rectangles for glBitmap,
- * glDrawPixels, glClear, etc.
+ /* Setup vertex element info for 'struct st_util_vertex'.
*/
- for (i = 0; i < ARRAY_SIZE(st->velems_util_draw); i++) {
- memset(&st->velems_util_draw[i], 0, sizeof(struct pipe_vertex_element));
- st->velems_util_draw[i].src_offset = i * 4 * sizeof(float);
- st->velems_util_draw[i].instance_divisor = 0;
- st->velems_util_draw[i].vertex_buffer_index =
- cso_get_aux_vertex_buffer_slot(st->cso_context);
- st->velems_util_draw[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ {
+ const unsigned slot = cso_get_aux_vertex_buffer_slot(st->cso_context);
+
+ /* If this assertion ever fails all state tracker calls to
+ * cso_get_aux_vertex_buffer_slot() should be audited. This
+ * particular call would have to be moved to just before each
+ * drawing call.
+ */
+ assert(slot == 0);
+
+ STATIC_ASSERT(sizeof(struct st_util_vertex) == 9 * sizeof(float));
+
+ memset(&st->util_velems, 0, sizeof(st->util_velems));
+ st->util_velems[0].src_offset = 0;
+ st->util_velems[0].vertex_buffer_index = slot;
+ st->util_velems[0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ st->util_velems[1].src_offset = 3 * sizeof(float);
+ st->util_velems[1].vertex_buffer_index = slot;
+ st->util_velems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ st->util_velems[2].src_offset = 7 * sizeof(float);
+ st->util_velems[2].vertex_buffer_index = slot;
+ st->util_velems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
}
/* we want all vertex data to be placed in buffer objects */
@@ -262,9 +287,9 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
/* Need these flags:
*/
- st->ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
- st->ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
st->has_stencil_export =
screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT);
@@ -328,8 +353,8 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
/* called after _mesa_create_context/_mesa_init_point, fix default user
* settable max point size up
*/
- st->ctx->Point.MaxSize = MAX2(ctx->Const.MaxPointSize,
- ctx->Const.MaxPointSizeAA);
+ ctx->Point.MaxSize = MAX2(ctx->Const.MaxPointSize,
+ ctx->Const.MaxPointSizeAA);
/* For vertex shaders, make sure not to emit saturate when SM 3.0 is not supported */
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoSat = !st->has_shader_model3;
@@ -377,6 +402,7 @@ static void st_init_driver_flags(struct gl_driver_flags *f)
f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER;
f->NewShaderStorageBuffer = ST_NEW_STORAGE_BUFFER;
+ f->NewImageUnits = ST_NEW_IMAGE_UNITS;
}
struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
@@ -440,6 +466,7 @@ void st_destroy_context( struct st_context *st )
st_reference_vertprog(st, &st->vp, NULL);
st_reference_tesscprog(st, &st->tcp, NULL);
st_reference_tesseprog(st, &st->tep, NULL);
+ st_reference_compprog(st, &st->cp, NULL);
/* release framebuffer surfaces */
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
@@ -449,7 +476,7 @@ void st_destroy_context( struct st_context *st )
pipe_sampler_view_reference(&st->pixel_xfer.pixelmap_sampler_view, NULL);
pipe_resource_reference(&st->pixel_xfer.pixelmap_texture, NULL);
- _vbo_DestroyContext(st->ctx);
+ _vbo_DestroyContext(ctx);
st_destroy_program_variants(st);
@@ -503,6 +530,7 @@ void st_init_driver_functions(struct pipe_screen *screen,
st_init_flush_functions(screen, functions);
st_init_string_functions(functions);
st_init_viewport_functions(functions);
+ st_init_compute_functions(functions);
st_init_xformfb_functions(functions);
st_init_syncobj_functions(functions);
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 9a80f4bae70..f960c64cbe8 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -64,6 +64,8 @@ struct u_upload_mgr;
#define ST_NEW_SAMPLER_VIEWS (1 << 11)
#define ST_NEW_ATOMIC_BUFFER (1 << 12)
#define ST_NEW_STORAGE_BUFFER (1 << 13)
+#define ST_NEW_COMPUTE_PROGRAM (1 << 14)
+#define ST_NEW_IMAGE_UNITS (1 << 15)
struct st_state_flags {
@@ -78,6 +80,23 @@ struct st_tracked_state {
};
+/**
+ * Enumeration of state tracker pipelines.
+ */
+enum st_pipeline {
+ ST_PIPELINE_RENDER,
+ ST_PIPELINE_COMPUTE,
+};
+
+
+/** For drawing quads for glClear, glDraw/CopyPixels, glBitmap, etc. */
+struct st_util_vertex
+{
+ float x, y, z;
+ float r, g, b, a;
+ float s, t;
+};
+
struct st_context
{
@@ -153,6 +172,7 @@ struct st_context
char renderer[100];
struct st_state_flags dirty;
+ struct st_state_flags dirty_cp;
GLboolean vertdata_edgeflags;
GLboolean edgeflag_culls_prims;
@@ -165,12 +185,14 @@ struct st_context
struct st_geometry_program *gp; /**< Currently bound geometry program */
struct st_tessctrl_program *tcp; /**< Currently bound tess control program */
struct st_tesseval_program *tep; /**< Currently bound tess eval program */
+ struct st_compute_program *cp; /**< Currently bound compute program */
struct st_vp_variant *vp_variant;
struct st_fp_variant *fp_variant;
struct st_basic_variant *gp_variant;
struct st_basic_variant *tcp_variant;
struct st_basic_variant *tep_variant;
+ struct st_basic_variant *cp_variant;
struct gl_texture_object *default_texture;
@@ -183,6 +205,7 @@ struct st_context
struct {
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
+ struct pipe_sampler_state atlas_sampler;
enum pipe_format tex_format;
void *vs;
struct bitmap_cache *cache;
@@ -194,6 +217,14 @@ struct st_context
void *vert_shaders[2]; /**< ureg shaders */
} drawpix;
+ struct {
+ GLsizei width, height;
+ GLenum format, type;
+ const void *user_pointer; /**< Last user 'pixels' pointer */
+ void *image; /**< Copy of the glDrawPixels image data */
+ struct pipe_resource *texture;
+ } drawpix_cache;
+
/** for glClear */
struct {
struct pipe_rasterizer_state raster;
@@ -217,8 +248,8 @@ struct st_context
bool use_gs;
} pbo_upload;
- /** used for anything using util_draw_vertex_buffer */
- struct pipe_vertex_element velems_util_draw[3];
+ /** for drawing with st_util_vertex */
+ struct pipe_vertex_element util_velems[3];
void *passthrough_fs; /**< simple pass-through frag shader */
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 10e294cd147..2de6620602d 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -60,7 +60,7 @@
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "util/u_prim.h"
-#include "util/u_draw_quad.h"
+#include "util/u_draw.h"
#include "util/u_upload_mgr.h"
#include "draw/draw_context.h"
#include "cso_cache/cso_context.h"
@@ -202,7 +202,7 @@ st_draw_vbo(struct gl_context *ctx,
/* Validate state. */
if (st->dirty.st || ctx->NewDriverState) {
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
#if 0
if (MESA_VERBOSE & VERBOSE_GLSL) {
@@ -315,7 +315,7 @@ st_indirect_draw_vbo(struct gl_context *ctx,
/* Validate state. */
if (st->dirty.st || ctx->NewDriverState) {
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
}
if (st->vertex_array_out_of_memory) {
@@ -398,3 +398,93 @@ st_destroy_draw(struct st_context *st)
{
draw_destroy(st->draw);
}
+
+
+/**
+ * Draw a quad with given position, texcoords and color.
+ */
+bool
+st_draw_quad(struct st_context *st,
+ float x0, float y0, float x1, float y1, float z,
+ float s0, float t0, float s1, float t1,
+ const float *color,
+ unsigned num_instances)
+{
+ struct pipe_vertex_buffer vb = {0};
+ struct st_util_vertex *verts;
+
+ vb.stride = sizeof(struct st_util_vertex);
+
+ u_upload_alloc(st->uploader, 0, 4 * sizeof(struct st_util_vertex), 4,
+ &vb.buffer_offset, &vb.buffer, (void **) &verts);
+ if (!vb.buffer) {
+ return false;
+ }
+
+ /* lower-left */
+ verts[0].x = x0;
+ verts[0].y = y1;
+ verts[0].z = z;
+ verts[0].r = color[0];
+ verts[0].g = color[1];
+ verts[0].b = color[2];
+ verts[0].a = color[3];
+ verts[0].s = s0;
+ verts[0].t = t0;
+
+ /* lower-right */
+ verts[1].x = x1;
+ verts[1].y = y1;
+ verts[1].z = z;
+ verts[1].r = color[0];
+ verts[1].g = color[1];
+ verts[1].b = color[2];
+ verts[1].a = color[3];
+ verts[1].s = s1;
+ verts[1].t = t0;
+
+ /* upper-right */
+ verts[2].x = x1;
+ verts[2].y = y0;
+ verts[2].z = z;
+ verts[2].r = color[0];
+ verts[2].g = color[1];
+ verts[2].b = color[2];
+ verts[2].a = color[3];
+ verts[2].s = s1;
+ verts[2].t = t1;
+
+ /* upper-left */
+ verts[3].x = x0;
+ verts[3].y = y0;
+ verts[3].z = z;
+ verts[3].r = color[0];
+ verts[3].g = color[1];
+ verts[3].b = color[2];
+ verts[3].a = color[3];
+ verts[3].s = s0;
+ verts[3].t = t1;
+
+ u_upload_unmap(st->uploader);
+
+ /* At the time of writing, cso_get_aux_vertex_buffer_slot() always returns
+ * zero. If that ever changes we need to audit the calls to that function
+ * and make sure the slot number is used consistently everywhere.
+ */
+ assert(cso_get_aux_vertex_buffer_slot(st->cso_context) == 0);
+
+ cso_set_vertex_buffers(st->cso_context,
+ cso_get_aux_vertex_buffer_slot(st->cso_context),
+ 1, &vb);
+
+ if (num_instances > 1) {
+ cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_FAN, 0, 4,
+ 0, num_instances);
+ } else {
+ cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_FAN, 0, 4);
+ }
+
+ pipe_resource_reference(&vb.buffer, NULL);
+
+ return true;
+}
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h
index a973c8a4a5d..d85c3b7facd 100644
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -85,4 +85,11 @@ pointer_to_offset(const void *ptr)
}
+bool
+st_draw_quad(struct st_context *st,
+ float x0, float y0, float x1, float y1, float z,
+ float s0, float t0, float s1, float t1,
+ const float *color,
+ unsigned num_instances);
+
#endif
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c
index b6e6dea5b27..9f48945d74d 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -140,7 +140,7 @@ st_feedback_draw_vbo(struct gl_context *ctx,
st_flush_bitmap_cache(st);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
if (!index_bounds_valid)
vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index feabe6290eb..24c64447f44 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -75,6 +75,7 @@ static int _clamp(int a, int min, int max)
void st_init_limits(struct pipe_screen *screen,
struct gl_constants *c, struct gl_extensions *extensions)
{
+ int supported_irs;
unsigned sh;
boolean can_ubo = TRUE;
@@ -174,9 +175,19 @@ void st_init_limits(struct pipe_screen *screen,
pc = &c->Program[MESA_SHADER_TESS_EVAL];
options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_EVAL];
break;
+ case PIPE_SHADER_COMPUTE:
+ pc = &c->Program[MESA_SHADER_COMPUTE];
+ options = &c->ShaderCompilerOptions[MESA_SHADER_COMPUTE];
+
+ if (!screen->get_param(screen, PIPE_CAP_COMPUTE))
+ continue;
+ supported_irs =
+ screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUPPORTED_IRS);
+ if (!(supported_irs & (1 << PIPE_SHADER_IR_TGSI)))
+ continue;
+ break;
default:
- /* compute shader, etc. */
- continue;
+ assert(0);
}
pc->MaxTextureImageUnits =
@@ -223,6 +234,9 @@ void st_init_limits(struct pipe_screen *screen,
screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2;
pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers;
+ pc->MaxImageUniforms = screen->get_shader_param(
+ screen, sh, PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
+
/* Gallium doesn't really care about local vs. env parameters so use the
* same limits.
*/
@@ -261,6 +275,9 @@ void st_init_limits(struct pipe_screen *screen,
options->LowerClipDistance = true;
options->LowerBufferInterfaceBlocks = true;
+
+ if (sh == PIPE_SHADER_COMPUTE)
+ options->LowerShaderSharedVariables = true;
}
c->LowerTessLevel = true;
@@ -270,7 +287,8 @@ void st_init_limits(struct pipe_screen *screen,
c->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits +
c->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits +
c->Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
- c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits,
+ c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
+ c->Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits,
MAX_COMBINED_TEXTURE_IMAGE_UNITS);
/* This depends on program constants. */
@@ -330,7 +348,8 @@ void st_init_limits(struct pipe_screen *screen,
c->Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks +
c->Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks +
c->Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks +
- c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks;
+ c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks +
+ c->Program[MESA_SHADER_COMPUTE].MaxUniformBlocks;
assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS);
}
@@ -363,6 +382,21 @@ void st_init_limits(struct pipe_screen *screen,
c->MaxShaderStorageBlockSize = 1 << 27;
extensions->ARB_shader_storage_buffer_object = GL_TRUE;
}
+
+ c->MaxCombinedImageUniforms =
+ c->Program[MESA_SHADER_VERTEX].MaxImageUniforms +
+ c->Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms +
+ c->Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms +
+ c->Program[MESA_SHADER_GEOMETRY].MaxImageUniforms +
+ c->Program[MESA_SHADER_FRAGMENT].MaxImageUniforms +
+ c->Program[MESA_SHADER_COMPUTE].MaxImageUniforms;
+ c->MaxCombinedShaderOutputResources += c->MaxCombinedImageUniforms;
+ c->MaxImageUnits = MAX_IMAGE_UNITS;
+ c->MaxImageSamples = 0; /* XXX */
+ if (c->MaxCombinedImageUniforms) {
+ extensions->ARB_shader_image_load_store = GL_TRUE;
+ extensions->ARB_shader_image_size = GL_TRUE;
+ }
}
@@ -1012,4 +1046,31 @@ void st_init_extensions(struct pipe_screen *screen,
if ((ST_DEBUG & DEBUG_GREMEDY) &&
screen->get_param(screen, PIPE_CAP_STRING_MARKER))
extensions->GREMEDY_string_marker = GL_TRUE;
+
+ if (screen->get_param(screen, PIPE_CAP_COMPUTE)) {
+ int compute_supported_irs =
+ screen->get_shader_param(screen, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_SUPPORTED_IRS);
+ if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
+ uint64_t grid_size[3], block_size[3];
+
+ screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
+ grid_size);
+ screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
+ block_size);
+ screen->get_compute_param(screen,
+ PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
+ &consts->MaxComputeWorkGroupInvocations);
+ screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
+ &consts->MaxComputeSharedMemorySize);
+
+ for (i = 0; i < 3; i++) {
+ consts->MaxComputeWorkGroupCount[i] = grid_size[i];
+ consts->MaxComputeWorkGroupSize[i] = block_size[i];
+ }
+ /* XXX: ARB_compute_shader is not enabled by default because images
+ * support is still not implemented yet. */
+ /* extensions->ARB_compute_shader = true; */
+ }
+ }
}
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 2b92bade440..82bf3a185ad 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -1484,6 +1484,48 @@ static const struct format_mapping format_map[] = {
{ PIPE_FORMAT_ETC1_RGB8, 0 }
},
+ /* ETC2 */
+ {
+ { GL_COMPRESSED_RGB8_ETC2, 0 },
+ { PIPE_FORMAT_ETC2_RGB8, 0 }
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ETC2, 0 },
+ { PIPE_FORMAT_ETC2_SRGB8, 0 }
+ },
+ {
+ { GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, 0 },
+ { PIPE_FORMAT_ETC2_RGB8A1, 0 }
+ },
+ {
+ { GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2, 0 },
+ { PIPE_FORMAT_ETC2_SRGB8A1, 0 }
+ },
+ {
+ { GL_COMPRESSED_RGBA8_ETC2_EAC, 0 },
+ { PIPE_FORMAT_ETC2_RGBA8, 0 }
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, 0 },
+ { PIPE_FORMAT_ETC2_SRGBA8, 0 }
+ },
+ {
+ { GL_COMPRESSED_R11_EAC, 0 },
+ { PIPE_FORMAT_ETC2_R11_UNORM, 0 }
+ },
+ {
+ { GL_COMPRESSED_SIGNED_R11_EAC, 0 },
+ { PIPE_FORMAT_ETC2_R11_SNORM, 0 }
+ },
+ {
+ { GL_COMPRESSED_RG11_EAC, 0 },
+ { PIPE_FORMAT_ETC2_RG11_UNORM, 0 }
+ },
+ {
+ { GL_COMPRESSED_SIGNED_RG11_EAC, 0 },
+ { PIPE_FORMAT_ETC2_RG11_SNORM, 0 }
+ },
+
/* BPTC */
{
{ GL_COMPRESSED_RGBA_BPTC_UNORM, 0 },
diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h
index 90e00e8ebf6..3e10aa64bc6 100644
--- a/src/mesa/state_tracker/st_format.h
+++ b/src/mesa/state_tracker/st_format.h
@@ -36,6 +36,10 @@
#include "pipe/p_defines.h"
#include "pipe/p_format.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct gl_context;
struct pipe_screen;
@@ -77,4 +81,8 @@ st_translate_color(const union gl_color_union *colorIn,
union pipe_color_union *colorOut,
GLenum baseFormat, GLboolean is_integer);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* ST_FORMAT_H */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 2ad91ecf4df..943582d447a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -40,6 +40,7 @@
#include "main/shaderobj.h"
#include "main/uniforms.h"
#include "main/shaderapi.h"
+#include "main/shaderimage.h"
#include "program/prog_instruction.h"
#include "pipe/p_context.h"
@@ -50,6 +51,7 @@
#include "util/u_memory.h"
#include "st_program.h"
#include "st_mesa_to_tgsi.h"
+#include "st_format.h"
#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
@@ -262,6 +264,7 @@ public:
int tex_target; /**< One of TEXTURE_*_INDEX */
glsl_base_type tex_type;
GLboolean tex_shadow;
+ unsigned image_format;
st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned tex_offset_num_offset;
@@ -395,6 +398,9 @@ public:
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
int buffers_used;
+ int images_used;
+ int image_targets[PIPE_MAX_SHADER_IMAGES];
+ unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
bool indirect_addr_consts;
int wpos_transform_const;
@@ -402,6 +408,7 @@ public:
bool native_integers;
bool have_sqrt;
bool have_fma;
+ bool use_shared_memory;
variable_storage *find_variable_storage(ir_variable *var);
@@ -451,6 +458,8 @@ public:
void visit_atomic_counter_intrinsic(ir_call *);
void visit_ssbo_intrinsic(ir_call *);
void visit_membar_intrinsic(ir_call *);
+ void visit_shared_intrinsic(ir_call *);
+ void visit_image_intrinsic(ir_call *);
st_src_reg result;
@@ -1214,6 +1223,7 @@ attrib_type_size(const struct glsl_type *type, bool is_vs_input)
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
+ case GLSL_TYPE_FUNCTION:
assert(!"Invalid type in type_size");
break;
}
@@ -1969,6 +1979,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
case ir_unop_u2i:
/* Converting between signed and unsigned integers is a no-op. */
result_src = op[0];
+ result_src.type = result_dst.type;
break;
case ir_unop_b2i:
if (native_integers) {
@@ -3341,6 +3352,239 @@ glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
}
void
+glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ exec_node *param = ir->actual_parameters.get_head();
+
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT);
+
+ /* Calculate the surface offset */
+ offset->accept(this);
+ st_src_reg off = this->result;
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_load_shared", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
+ inst->buffer = buffer;
+ } else if (!strcmp("__intrinsic_store_shared", callee)) {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+ dst.writemask = write_mask->value.u[0];
+
+ dst.type = this->result.type;
+ inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
+ inst->buffer = buffer;
+ } else {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ st_src_reg data = this->result, data2 = undef_src;
+ unsigned opcode;
+ if (!strcmp("__intrinsic_atomic_add_shared", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_atomic_min_shared", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_atomic_max_shared", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_atomic_and_shared", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_atomic_or_shared", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_atomic_xor_shared", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_atomic_exchange_shared", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
+ opcode = TGSI_OPCODE_ATOMCAS;
+ param = param->get_next();
+ val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+ data2 = this->result;
+ } else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, off, data, data2);
+ inst->buffer = buffer;
+ }
+}
+
+void
+glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ exec_node *param = ir->actual_parameters.get_head();
+
+ ir_dereference *img = (ir_dereference *)param;
+ const ir_variable *imgvar = img->variable_referenced();
+ const glsl_type *type = imgvar->type->without_array();
+ unsigned sampler_array_size = 1, sampler_base = 0;
+
+ st_src_reg reladdr;
+ st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
+
+ get_deref_offsets(img, &sampler_array_size, &sampler_base,
+ (unsigned int *)&image.index, &reladdr);
+ if (reladdr.file != PROGRAM_UNDEFINED) {
+ emit_arl(ir, sampler_reladdr, reladdr);
+ image.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(image.reladdr, &sampler_reladdr, sizeof(reladdr));
+ }
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_image_size", callee)) {
+ dst.writemask = WRITEMASK_XYZ;
+ inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst);
+ } else if (!strcmp("__intrinsic_image_samples", callee)) {
+ st_src_reg res = get_temp(glsl_type::ivec4_type);
+ st_dst_reg dstres = st_dst_reg(res);
+ dstres.writemask = WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_RESQ, dstres);
+ res.swizzle = SWIZZLE_WWWW;
+ inst = emit_asm(ir, TGSI_OPCODE_MOV, dst, res);
+ } else {
+ st_src_reg arg1 = undef_src, arg2 = undef_src;
+ st_src_reg coord;
+ st_dst_reg coord_dst;
+ coord = get_temp(glsl_type::ivec4_type);
+ coord_dst = st_dst_reg(coord);
+ coord_dst.writemask = (1 << type->coordinate_components()) - 1;
+ param = param->get_next();
+ ((ir_dereference *)param)->accept(this);
+ emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+ coord.swizzle = SWIZZLE_XXXX;
+ switch (type->coordinate_components()) {
+ case 4: assert(!"unexpected coord count");
+ /* fallthrough */
+ case 3: coord.swizzle |= SWIZZLE_Z << 6;
+ /* fallthrough */
+ case 2: coord.swizzle |= SWIZZLE_Y << 3;
+ }
+
+ if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
+ param = param->get_next();
+ ((ir_dereference *)param)->accept(this);
+ st_src_reg sample = this->result;
+ sample.swizzle = SWIZZLE_XXXX;
+ coord_dst.writemask = WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample);
+ coord.swizzle |= SWIZZLE_W << 9;
+ }
+
+ param = param->get_next();
+ if (!param->is_tail_sentinel()) {
+ ((ir_dereference *)param)->accept(this);
+ arg1 = this->result;
+ param = param->get_next();
+ }
+
+ if (!param->is_tail_sentinel()) {
+ ((ir_dereference *)param)->accept(this);
+ arg2 = this->result;
+ param = param->get_next();
+ }
+
+ assert(param->is_tail_sentinel());
+
+ unsigned opcode;
+ if (!strcmp("__intrinsic_image_load", callee))
+ opcode = TGSI_OPCODE_LOAD;
+ else if (!strcmp("__intrinsic_image_store", callee))
+ opcode = TGSI_OPCODE_STORE;
+ else if (!strcmp("__intrinsic_image_atomic_add", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_image_atomic_min", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_image_atomic_max", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_image_atomic_and", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_image_atomic_or", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_image_atomic_xor", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_image_atomic_exchange", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee))
+ opcode = TGSI_OPCODE_ATOMCAS;
+ else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, coord, arg1, arg2);
+ if (opcode == TGSI_OPCODE_STORE)
+ inst->dst[0].writemask = WRITEMASK_XYZW;
+ }
+
+ inst->buffer = image;
+ inst->sampler_array_size = sampler_array_size;
+ inst->sampler_base = sampler_base;
+
+ switch (type->sampler_dimensionality) {
+ case GLSL_SAMPLER_DIM_1D:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ inst->tex_target = TEXTURE_3D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_CUBE:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_RECT:
+ inst->tex_target = TEXTURE_RECT_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_BUF:
+ inst->tex_target = TEXTURE_BUFFER_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ inst->tex_target = TEXTURE_EXTERNAL_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_MS:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
+ break;
+ default:
+ assert(!"Should not get here.");
+ }
+
+ inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
+ _mesa_get_shader_image_format(imgvar->data.image_format));
+}
+
+void
glsl_to_tgsi_visitor::visit(ir_call *ir)
{
glsl_to_tgsi_instruction *call_inst;
@@ -3381,6 +3625,36 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
return;
}
+ if (!strcmp("__intrinsic_load_shared", callee) ||
+ !strcmp("__intrinsic_store_shared", callee) ||
+ !strcmp("__intrinsic_atomic_add_shared", callee) ||
+ !strcmp("__intrinsic_atomic_min_shared", callee) ||
+ !strcmp("__intrinsic_atomic_max_shared", callee) ||
+ !strcmp("__intrinsic_atomic_and_shared", callee) ||
+ !strcmp("__intrinsic_atomic_or_shared", callee) ||
+ !strcmp("__intrinsic_atomic_xor_shared", callee) ||
+ !strcmp("__intrinsic_atomic_exchange_shared", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
+ visit_shared_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_image_load", callee) ||
+ !strcmp("__intrinsic_image_store", callee) ||
+ !strcmp("__intrinsic_image_atomic_add", callee) ||
+ !strcmp("__intrinsic_image_atomic_min", callee) ||
+ !strcmp("__intrinsic_image_atomic_max", callee) ||
+ !strcmp("__intrinsic_image_atomic_and", callee) ||
+ !strcmp("__intrinsic_image_atomic_or", callee) ||
+ !strcmp("__intrinsic_image_atomic_xor", callee) ||
+ !strcmp("__intrinsic_image_atomic_exchange", callee) ||
+ !strcmp("__intrinsic_image_atomic_comp_swap", callee) ||
+ !strcmp("__intrinsic_image_size", callee) ||
+ !strcmp("__intrinsic_image_samples", callee)) {
+ visit_image_intrinsic(ir);
+ return;
+ }
+
entry = get_function_signature(sig);
/* Process in parameters. */
foreach_two_lists(formal_node, &sig->parameters,
@@ -3980,6 +4254,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
num_address_regs = 0;
samplers_used = 0;
buffers_used = 0;
+ images_used = 0;
indirect_addr_consts = false;
wpos_transform_const = -1;
glsl_version = 0;
@@ -3992,6 +4267,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
options = NULL;
have_sqrt = false;
have_fma = false;
+ use_shared_memory = false;
}
glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
@@ -4015,6 +4291,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
{
v->samplers_used = 0;
v->buffers_used = 0;
+ v->images_used = 0;
foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
if (inst->info->is_tex) {
@@ -4035,8 +4312,20 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
if (inst->buffer.file != PROGRAM_UNDEFINED && (
is_resource_instruction(inst->op) ||
inst->op == TGSI_OPCODE_STORE)) {
- if (inst->buffer.file == PROGRAM_BUFFER)
+ if (inst->buffer.file == PROGRAM_BUFFER) {
v->buffers_used |= 1 << inst->buffer.index;
+ } else if (inst->buffer.file == PROGRAM_MEMORY) {
+ v->use_shared_memory = true;
+ } else {
+ assert(inst->buffer.file == PROGRAM_IMAGE);
+ for (int i = 0; i < inst->sampler_array_size; i++) {
+ unsigned idx = inst->sampler_base + i;
+ v->images_used |= 1 << idx;
+ v->image_targets[idx] =
+ st_translate_texture_target(inst->tex_target, false);
+ v->image_formats[idx] = inst->image_format;
+ }
+ }
}
}
prog->SamplersUsed = v->samplers_used;
@@ -4819,7 +5108,9 @@ struct st_translate {
struct ureg_dst address[3];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
+ struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+ struct ureg_src shared_memory;
struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned *array_sizes;
struct array_decl *input_arrays;
@@ -4880,6 +5171,12 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
TGSI_SEMANTIC_PRIMID,
TGSI_SEMANTIC_TESSOUTER,
TGSI_SEMANTIC_TESSINNER,
+
+ /* Compute shaders
+ */
+ TGSI_SEMANTIC_THREAD_ID,
+ TGSI_SEMANTIC_BLOCK_ID,
+ TGSI_SEMANTIC_GRID_SIZE,
};
/**
@@ -5308,7 +5605,12 @@ compile_tgsi_instruction(struct st_translate *t,
for (i = num_src - 1; i >= 0; i--)
src[i + 1] = src[i];
num_src++;
- src[0] = t->buffers[inst->buffer.index];
+ if (inst->buffer.file == PROGRAM_MEMORY)
+ src[0] = t->shared_memory;
+ else if (inst->buffer.file == PROGRAM_BUFFER)
+ src[0] = t->buffers[inst->buffer.index];
+ else
+ src[0] = t->images[inst->buffer.index];
if (inst->buffer.reladdr)
src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
assert(src[0].File != TGSI_FILE_NULL);
@@ -5317,7 +5619,13 @@ compile_tgsi_instruction(struct st_translate *t,
break;
case TGSI_OPCODE_STORE:
- dst[0] = ureg_writemask(ureg_dst(t->buffers[inst->buffer.index]), inst->dst[0].writemask);
+ if (inst->buffer.file == PROGRAM_MEMORY)
+ dst[0] = ureg_dst(t->shared_memory);
+ else if (inst->buffer.file == PROGRAM_BUFFER)
+ dst[0] = ureg_dst(t->buffers[inst->buffer.index]);
+ else
+ dst[0] = ureg_dst(t->images[inst->buffer.index]);
+ dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
if (inst->buffer.reladdr)
dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
assert(dst[0].File != TGSI_FILE_NULL);
@@ -5643,6 +5951,12 @@ st_translate_program(
TGSI_SEMANTIC_TESSCOORD);
assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] ==
TGSI_SEMANTIC_HELPER_INVOCATION);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_LOCAL_INVOCATION_ID] ==
+ TGSI_SEMANTIC_THREAD_ID);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_WORK_GROUP_ID] ==
+ TGSI_SEMANTIC_BLOCK_ID);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_NUM_WORK_GROUPS] ==
+ TGSI_SEMANTIC_GRID_SIZE);
t = CALLOC_STRUCT(st_translate);
if (!t) {
@@ -5710,6 +6024,8 @@ st_translate_program(
t->inputs[i] = ureg_DECL_vs_input(ureg, i);
}
break;
+ case TGSI_PROCESSOR_COMPUTE:
+ break;
default:
assert(0);
}
@@ -5719,6 +6035,7 @@ st_translate_program(
*/
switch (procType) {
case TGSI_PROCESSOR_FRAGMENT:
+ case TGSI_PROCESSOR_COMPUTE:
break;
case TGSI_PROCESSOR_GEOMETRY:
case TGSI_PROCESSOR_TESS_EVAL:
@@ -5969,7 +6286,17 @@ st_translate_program(
}
}
+ if (program->use_shared_memory)
+ t->shared_memory = ureg_DECL_shared_memory(ureg);
+ for (i = 0; i < program->shader->NumImages; i++) {
+ if (program->images_used & (1 << i)) {
+ t->images[i] = ureg_DECL_image(ureg, i,
+ program->image_targets[i],
+ program->image_formats[i],
+ true, false);
+ }
+ }
/* Emit each instruction in turn:
*/
@@ -6188,6 +6515,7 @@ get_mesa_program(struct gl_context *ctx,
struct st_geometry_program *stgp;
struct st_tessctrl_program *sttcp;
struct st_tesseval_program *sttep;
+ struct st_compute_program *stcp;
switch (shader->Type) {
case GL_VERTEX_SHADER:
@@ -6210,6 +6538,10 @@ get_mesa_program(struct gl_context *ctx,
sttep = (struct st_tesseval_program *)prog;
sttep->glsl_to_tgsi = v;
break;
+ case GL_COMPUTE_SHADER:
+ stcp = (struct st_compute_program *)prog;
+ stcp->glsl_to_tgsi = v;
+ break;
default:
assert(!"should not be reached");
return NULL;
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 84b65369d80..a983d64b5d5 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -906,7 +906,6 @@ st_manager_add_color_renderbuffer(struct st_context *st,
break;
default:
return FALSE;
- break;
}
if (!st_framebuffer_add_renderbuffer(stfb, idx))
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 5bd626f8610..2e21d02b8b5 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -158,6 +158,9 @@ delete_basic_variant(struct st_context *st, struct st_basic_variant *v,
case GL_GEOMETRY_PROGRAM_NV:
cso_delete_geometry_shader(st->cso_context, v->driver_shader);
break;
+ case GL_COMPUTE_PROGRAM_NV:
+ cso_delete_compute_shader(st->cso_context, v->driver_shader);
+ break;
default:
assert(!"this shouldn't occur");
}
@@ -193,6 +196,30 @@ st_release_basic_variants(struct st_context *st, GLenum target,
/**
+ * Free all variants of a compute program.
+ */
+void
+st_release_cp_variants(struct st_context *st, struct st_compute_program *stcp)
+{
+ struct st_basic_variant **variants = &stcp->variants;
+ struct st_basic_variant *v;
+
+ for (v = *variants; v; ) {
+ struct st_basic_variant *next = v->next;
+ delete_basic_variant(st, v, stcp->Base.Base.Target);
+ v = next;
+ }
+
+ *variants = NULL;
+
+ if (stcp->tgsi.prog) {
+ ureg_free_tokens(stcp->tgsi.prog);
+ stcp->tgsi.prog = NULL;
+ }
+}
+
+
+/**
* Translate a vertex program.
*/
bool
@@ -1395,6 +1422,74 @@ st_translate_tesseval_program(struct st_context *st,
/**
+ * Translate a compute program to create a new variant.
+ */
+bool
+st_translate_compute_program(struct st_context *st,
+ struct st_compute_program *stcp)
+{
+ struct ureg_program *ureg;
+ struct pipe_shader_state prog;
+
+ ureg = ureg_create_with_screen(TGSI_PROCESSOR_COMPUTE, st->pipe->screen);
+ if (ureg == NULL)
+ return false;
+
+ st_translate_program_common(st, &stcp->Base.Base, stcp->glsl_to_tgsi, ureg,
+ TGSI_PROCESSOR_COMPUTE, &prog);
+
+ stcp->tgsi.prog = prog.tokens;
+ stcp->tgsi.req_local_mem = stcp->Base.SharedSize;
+ stcp->tgsi.req_private_mem = 0;
+ stcp->tgsi.req_input_mem = 0;
+
+ free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi);
+ stcp->glsl_to_tgsi = NULL;
+ return true;
+}
+
+
+/**
+ * Get/create compute program variant.
+ */
+struct st_basic_variant *
+st_get_cp_variant(struct st_context *st,
+ struct pipe_compute_state *tgsi,
+ struct st_basic_variant **variants)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_basic_variant *v;
+ struct st_basic_variant_key key;
+
+ memset(&key, 0, sizeof(key));
+ key.st = st->has_shareable_shaders ? NULL : st;
+
+ /* Search for existing variant */
+ for (v = *variants; v; v = v->next) {
+ if (memcmp(&v->key, &key, sizeof(key)) == 0) {
+ break;
+ }
+ }
+
+ if (!v) {
+ /* create new */
+ v = CALLOC_STRUCT(st_basic_variant);
+ if (v) {
+ /* fill in new variant */
+ v->driver_shader = pipe->create_compute_state(pipe, tgsi);
+ v->key = key;
+
+ /* insert into list */
+ v->next = *variants;
+ *variants = v;
+ }
+ }
+
+ return v;
+}
+
+
+/**
* Vert/Geom/Frag programs have per-context variants. Free all the
* variants attached to the given program which match the given context.
*/
@@ -1449,14 +1544,17 @@ destroy_program_variants(struct st_context *st, struct gl_program *target)
case GL_GEOMETRY_PROGRAM_NV:
case GL_TESS_CONTROL_PROGRAM_NV:
case GL_TESS_EVALUATION_PROGRAM_NV:
+ case GL_COMPUTE_PROGRAM_NV:
{
struct st_geometry_program *gp = (struct st_geometry_program*)target;
struct st_tessctrl_program *tcp = (struct st_tessctrl_program*)target;
struct st_tesseval_program *tep = (struct st_tesseval_program*)target;
+ struct st_compute_program *cp = (struct st_compute_program*)target;
struct st_basic_variant **variants =
target->Target == GL_GEOMETRY_PROGRAM_NV ? &gp->variants :
target->Target == GL_TESS_CONTROL_PROGRAM_NV ? &tcp->variants :
target->Target == GL_TESS_EVALUATION_PROGRAM_NV ? &tep->variants :
+ target->Target == GL_COMPUTE_PROGRAM_NV ? &cp->variants :
NULL;
struct st_basic_variant *v, **prevPtr = variants;
@@ -1513,6 +1611,7 @@ destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
case GL_GEOMETRY_SHADER:
case GL_TESS_CONTROL_SHADER:
case GL_TESS_EVALUATION_SHADER:
+ case GL_COMPUTE_SHADER:
{
destroy_program_variants(st, shader->Program);
}
@@ -1629,6 +1728,12 @@ st_precompile_shader_variant(struct st_context *st,
break;
}
+ case GL_COMPUTE_PROGRAM_NV: {
+ struct st_compute_program *p = (struct st_compute_program *)prog;
+ st_get_cp_variant(st, &p->tgsi, &p->variants);
+ break;
+ }
+
default:
assert(0);
}
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index 74f3def6095..028fba99a74 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -231,6 +231,18 @@ struct st_tesseval_program
};
+/**
+ * Derived from Mesa gl_compute_program:
+ */
+struct st_compute_program
+{
+ struct gl_compute_program Base; /**< The Mesa compute program */
+ struct pipe_compute_state tgsi;
+ struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+
+ struct st_basic_variant *variants;
+};
+
static inline struct st_fragment_program *
st_fragment_program( struct gl_fragment_program *fp )
@@ -263,6 +275,12 @@ st_tesseval_program( struct gl_tess_eval_program *tep )
return (struct st_tesseval_program *)tep;
}
+static inline struct st_compute_program *
+st_compute_program( struct gl_compute_program *cp )
+{
+ return (struct st_compute_program *)cp;
+}
+
static inline void
st_reference_vertprog(struct st_context *st,
struct st_vertex_program **ptr,
@@ -313,6 +331,16 @@ st_reference_tesseprog(struct st_context *st,
(struct gl_program *) prog);
}
+static inline void
+st_reference_compprog(struct st_context *st,
+ struct st_compute_program **ptr,
+ struct st_compute_program *prog)
+{
+ _mesa_reference_program(st->ctx,
+ (struct gl_program **) ptr,
+ (struct gl_program *) prog);
+}
+
/**
* This defines mapping from Mesa VARYING_SLOTs to TGSI GENERIC slots.
*/
@@ -351,6 +379,11 @@ st_get_fp_variant(struct st_context *st,
const struct st_fp_variant_key *key);
extern struct st_basic_variant *
+st_get_cp_variant(struct st_context *st,
+ struct pipe_compute_state *tgsi,
+ struct st_basic_variant **variants);
+
+extern struct st_basic_variant *
st_get_basic_variant(struct st_context *st,
unsigned pipe_shader,
struct pipe_shader_state *tgsi,
@@ -365,6 +398,10 @@ st_release_fp_variants( struct st_context *st,
struct st_fragment_program *stfp );
extern void
+st_release_cp_variants(struct st_context *st,
+ struct st_compute_program *stcp);
+
+extern void
st_release_basic_variants(struct st_context *st, GLenum target,
struct st_basic_variant **variants,
struct pipe_shader_state *tgsi);
@@ -392,6 +429,10 @@ extern bool
st_translate_tesseval_program(struct st_context *st,
struct st_tesseval_program *sttep);
+extern bool
+st_translate_compute_program(struct st_context *st,
+ struct st_compute_program *stcp);
+
extern void
st_print_current_vertex_program(void);
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index 85b013cac24..c69abfa3e64 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -108,7 +108,9 @@ static inline uint32_t _mesa_hash_pointer(const void *pointer)
return _mesa_hash_data(&pointer, sizeof(pointer));
}
-static const uint32_t _mesa_fnv32_1a_offset_bias = 2166136261u;
+enum {
+ _mesa_fnv32_1a_offset_bias = 2166136261u,
+};
static inline uint32_t
_mesa_fnv32_1a_accumulate_block(uint32_t hash, const void *data, size_t size)
diff --git a/src/util/u_atomic.h b/src/util/u_atomic.h
index e38395ac633..867590391f4 100644
--- a/src/util/u_atomic.h
+++ b/src/util/u_atomic.h
@@ -88,65 +88,6 @@
#include <intrin.h>
#include <assert.h>
-#if _MSC_VER < 1600
-
-/* Implement _InterlockedCompareExchange8 in terms of _InterlockedCompareExchange16 */
-static __inline char
-_InterlockedCompareExchange8(char volatile *destination8, char exchange8, char comparand8)
-{
- INT_PTR destinationAddr = (INT_PTR)destination8;
- short volatile *destination16 = (short volatile *)(destinationAddr & ~1);
- const short shift8 = (destinationAddr & 1) * 8;
- const short mask8 = 0xff << shift8;
- short initial16 = *destination16;
- char initial8 = initial16 >> shift8;
- while (initial8 == comparand8) {
- /* initial *destination8 matches, so try exchange it while keeping the
- * neighboring byte untouched */
- short exchange16 = (initial16 & ~mask8) | ((short)exchange8 << shift8);
- short comparand16 = initial16;
- short initial16 = _InterlockedCompareExchange16(destination16, exchange16, comparand16);
- if (initial16 == comparand16) {
- /* succeeded */
- return comparand8;
- }
- /* something changed, retry with the new initial value */
- initial8 = initial16 >> shift8;
- }
- return initial8;
-}
-
-/* Implement _InterlockedExchangeAdd16 in terms of _InterlockedCompareExchange16 */
-static __inline short
-_InterlockedExchangeAdd16(short volatile *addend, short value)
-{
- short initial = *addend;
- short comparand;
- do {
- short exchange = initial + value;
- comparand = initial;
- /* if *addend==comparand then *addend=exchange, return original *addend */
- initial = _InterlockedCompareExchange16(addend, exchange, comparand);
- } while(initial != comparand);
- return comparand;
-}
-
-/* Implement _InterlockedExchangeAdd8 in terms of _InterlockedCompareExchange8 */
-static __inline char
-_InterlockedExchangeAdd8(char volatile *addend, char value)
-{
- char initial = *addend;
- char comparand;
- do {
- char exchange = initial + value;
- comparand = initial;
- initial = _InterlockedCompareExchange8(addend, exchange, comparand);
- } while(initial != comparand);
- return comparand;
-}
-
-#endif /* _MSC_VER < 1600 */
-
/* MSVC supports decltype keyword, but it's only supported on C++ and doesn't
* quite work here; and if a C++-only solution is worthwhile, then it would be
* better to use templates / function overloading, instead of decltype magic.