summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-10-19 11:15:32 -0700
committerJason Ekstrand <[email protected]>2015-10-19 14:14:21 -0700
commit958fc04dc51a2561c8598f42df59e3d9139e56a7 (patch)
treeb6acf05aa073e97ae8e58647bf05c2c3e816f041 /src
parent995d9c4ac7fb046e01196cec308ebe10002a28da (diff)
parentde862f03accb12b044ced60cb98f47a055457223 (diff)
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am1
-rw-r--r--src/gallium/auxiliary/Makefile.sources2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.c9
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c19
-rw-r--r--src/gallium/auxiliary/hud/hud_context.c3
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_emulate.c169
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_emulate.h38
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c26
-rw-r--r--src/gallium/auxiliary/util/u_debug.c6
-rw-r--r--src/gallium/auxiliary/util/u_inlines.h22
-rw-r--r--src/gallium/auxiliary/util/u_vbuf.c1
-rw-r--r--src/gallium/drivers/freedreno/Makefile.am3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c6
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.h11
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.c2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.h3
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_nir.h1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h2
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder.c2
-rw-r--r--src/gallium/drivers/ilo/core/ilo_core.h2
-rw-r--r--src/gallium/drivers/ilo/core/ilo_debug.h2
-rw-r--r--src/gallium/drivers/ilo/core/ilo_image.c4
-rw-r--r--src/gallium/drivers/ilo/core/ilo_image.h2
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_cc.c12
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_raster.c14
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_sbe.c12
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader_ps.c7
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_surface.c4
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_vf.c8
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_eu_message.xml.h3
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_mi.xml.h96
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_regs.xml.h17
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_render.xml.h16
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h278
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h18
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_render_media.xml.h6
-rw-r--r--src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h36
-rw-r--r--src/gallium/drivers/ilo/ilo_common.h1
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.c25
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.h9
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources9
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp20
-rw-r--r--src/gallium/drivers/nouveau/nouveau_fence.c18
-rw-r--r--src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_shader_state.c9
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_transfer.c16
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c15
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h12
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c1400
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.h39
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c491
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h56
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c440
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h42
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c1387
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h120
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.c162
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.h64
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c8
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h145
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c5
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c22
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c20
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c5
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c2
-rw-r--r--src/gallium/drivers/r600/r600_shader.c10
-rw-r--r--src/gallium/drivers/r600/r600_shader.h2
-rw-r--r--src/gallium/drivers/r600/r600_uvd.c30
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc.h14
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_dump.cpp17
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_finalize.cpp9
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_parser.cpp61
-rw-r--r--src/gallium/drivers/r600/sb/sb_expr.cpp3
-rw-r--r--src/gallium/drivers/r600/sb/sb_gcm.cpp11
-rw-r--r--src/gallium/drivers/r600/sb/sb_ir.h7
-rw-r--r--src/gallium/drivers/r600/sb/sb_sched.cpp220
-rw-r--r--src/gallium/drivers/r600/sb/sb_sched.h9
-rw-r--r--src/gallium/drivers/r600/sb/sb_shader.cpp4
-rw-r--r--src/gallium/drivers/r600/sb/sb_shader.h2
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c118
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c142
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h11
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c206
-rw-r--r--src/gallium/drivers/svga/svga_context.c2
-rw-r--r--src/gallium/drivers/svga/svga_context.h34
-rw-r--r--src/gallium/drivers/svga/svga_pipe_blend.c3
-rw-r--r--src/gallium/drivers/svga/svga_pipe_depthstencil.c3
-rw-r--r--src/gallium/drivers/svga/svga_pipe_draw.c4
-rw-r--r--src/gallium/drivers/svga/svga_pipe_query.c97
-rw-r--r--src/gallium/drivers/svga/svga_pipe_rasterizer.c3
-rw-r--r--src/gallium/drivers/svga/svga_pipe_sampler.c3
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vertex.c4
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c16
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.h3
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c27
-rw-r--r--src/gallium/drivers/svga/svga_screen.c19
-rw-r--r--src/gallium/drivers/svga/svga_screen.h8
-rw-r--r--src/gallium/drivers/svga/svga_shader.c10
-rw-r--r--src/gallium/drivers/svga/svga_shader.h3
-rw-r--r--src/gallium/drivers/svga/svga_state.c3
-rw-r--r--src/gallium/drivers/svga/svga_state_constants.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c3
-rw-r--r--src/gallium/drivers/svga/svga_state_gs.c10
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c3
-rw-r--r--src/gallium/drivers/svga/svga_surface.c4
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.c5
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.h3
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c2
-rw-r--r--src/gallium/state_trackers/va/image.c10
-rw-r--r--src/gallium/targets/d3dadapter9/Makefile.am1
-rw-r--r--src/gallium/targets/pipe-loader/Makefile.am1
-rw-r--r--src/gallium/targets/xa/Makefile.am1
-rw-r--r--src/glsl/Makefile.am5
-rw-r--r--src/glsl/Makefile.sources11
-rw-r--r--src/glsl/SConscript7
-rw-r--r--src/glsl/ast.h27
-rw-r--r--src/glsl/ast_array_index.cpp25
-rw-r--r--src/glsl/ast_function.cpp95
-rw-r--r--src/glsl/ast_to_hir.cpp277
-rw-r--r--src/glsl/builtin_functions.cpp2
-rw-r--r--src/glsl/builtin_types.cpp4
-rw-r--r--src/glsl/glsl_parser.yy24
-rw-r--r--src/glsl/glsl_parser_extras.h14
-rw-r--r--src/glsl/ir.cpp20
-rw-r--r--src/glsl/ir.h2
-rw-r--r--src/glsl/ir_constant_expression.cpp1
-rw-r--r--src/glsl/ir_set_program_inouts.cpp6
-rw-r--r--src/glsl/ir_uniform.h32
-rw-r--r--src/glsl/ir_variable_refcount.cpp26
-rw-r--r--src/glsl/ir_variable_refcount.h13
-rw-r--r--src/glsl/link_atomics.cpp77
-rw-r--r--src/glsl/link_uniform_block_active_visitor.cpp168
-rw-r--r--src/glsl/link_uniform_block_active_visitor.h13
-rw-r--r--src/glsl/link_uniform_blocks.cpp160
-rw-r--r--src/glsl/link_uniform_initializers.cpp95
-rw-r--r--src/glsl/link_uniforms.cpp39
-rw-r--r--src/glsl/linker.cpp356
-rw-r--r--src/glsl/lower_named_interface_blocks.cpp52
-rw-r--r--src/glsl/lower_ubo_reference.cpp168
-rw-r--r--src/glsl/lower_vec_index_to_cond_assign.cpp4
-rw-r--r--src/glsl/lower_vector_insert.cpp6
-rw-r--r--src/glsl/nir/builtin_type_macros.h (renamed from src/glsl/builtin_type_macros.h)0
-rw-r--r--src/glsl/nir/glsl_to_nir.cpp98
-rw-r--r--src/glsl/nir/glsl_types.cpp (renamed from src/glsl/glsl_types.cpp)46
-rw-r--r--src/glsl/nir/glsl_types.h (renamed from src/glsl/glsl_types.h)0
-rw-r--r--src/glsl/nir/nir.c108
-rw-r--r--src/glsl/nir/nir.h59
-rw-r--r--src/glsl/nir/nir_constant_expressions.py1
-rw-r--r--src/glsl/nir/nir_instr_set.c519
-rw-r--r--src/glsl/nir/nir_instr_set.h62
-rw-r--r--src/glsl/nir/nir_intrinsics.h6
-rw-r--r--src/glsl/nir/nir_lower_atomics.c22
-rw-r--r--src/glsl/nir/nir_opt_cse.c293
-rw-r--r--src/glsl/nir/nir_sweep.c2
-rw-r--r--src/glsl/nir/nir_types.cpp6
-rw-r--r--src/glsl/nir/nir_types.h4
-rw-r--r--src/glsl/nir/shader_enums.c (renamed from src/glsl/shader_enums.c)10
-rw-r--r--src/glsl/nir/shader_enums.h (renamed from src/glsl/shader_enums.h)24
-rw-r--r--src/glsl/opt_dead_code.cpp35
-rw-r--r--src/glsl/opt_tree_grafting.cpp2
-rw-r--r--src/glsl/standalone_scaffolding.cpp11
-rw-r--r--src/mesa/Android.libmesa_dricore.mk3
-rw-r--r--src/mesa/Android.libmesa_glsl_utils.mk2
-rw-r--r--src/mesa/Android.libmesa_st_mesa.mk1
-rw-r--r--src/mesa/Makefile.sources7
-rw-r--r--src/mesa/SConscript1
-rw-r--r--src/mesa/drivers/common/meta_copy_image.c5
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c10
-rw-r--r--src/mesa/drivers/dri/i965/Android.mk1
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.am1
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_clear.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h661
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h358
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp274
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp17
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp260
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_validate.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_surface_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_fast_clear.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c96
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.h124
-rw-r--r--src/mesa/drivers/dri/i965/brw_sampler_state.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp9
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h62
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp143
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp26
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp66
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h32
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp54
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c30
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c114
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c89
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.h2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c12
-rw-r--r--src/mesa/drivers/dri/i965/gen8_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c32
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen8_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_asm_annotation.c17
-rw-r--r--src/mesa/drivers/dri/i965/intel_asm_annotation.h3
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c8
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp2
-rw-r--r--src/mesa/drivers/dri/r200/r200_vertprog.c17
-rw-r--r--src/mesa/drivers/x11/SConscript2
-rw-r--r--src/mesa/main/blend.c161
-rw-r--r--src/mesa/main/es1_conversion.c20
-rw-r--r--src/mesa/main/ff_fragment_shader.cpp12
-rw-r--r--src/mesa/main/ffvertex_prog.c3
-rw-r--r--src/mesa/main/format_utils.h1
-rw-r--r--src/mesa/main/imports.c148
-rw-r--r--src/mesa/main/imports.h7
-rw-r--r--src/mesa/main/matrix.c1
-rw-r--r--src/mesa/main/mipmap.c1
-rw-r--r--src/mesa/main/mtypes.h97
-rw-r--r--src/mesa/main/pack.c15
-rw-r--r--src/mesa/main/shader_query.cpp224
-rw-r--r--src/mesa/main/shaderapi.c10
-rw-r--r--src/mesa/main/shaderimage.c21
-rw-r--r--src/mesa/main/shaderimage.h12
-rw-r--r--src/mesa/main/shaderobj.c4
-rw-r--r--src/mesa/main/shared.c5
-rw-r--r--src/mesa/main/state.c6
-rw-r--r--src/mesa/main/texcompress_bptc.c1
-rw-r--r--src/mesa/main/texobj.c78
-rw-r--r--src/mesa/main/texobj.h3
-rw-r--r--src/mesa/main/texstate.c3
-rw-r--r--src/mesa/main/textureview.c2
-rw-r--r--src/mesa/main/uniform_query.cpp53
-rw-r--r--src/mesa/main/uniforms.c12
-rw-r--r--src/mesa/main/uniforms.h2
-rw-r--r--src/mesa/main/version.c19
-rw-r--r--src/mesa/program/Android.mk1
-rw-r--r--src/mesa/program/ir_to_mesa.cpp2
-rw-r--r--src/mesa/program/prog_to_nir.c32
-rw-r--r--src/mesa/program/program.c638
-rw-r--r--src/mesa/program/program.h87
-rw-r--r--src/mesa/program/sampler.cpp2
-rw-r--r--src/mesa/state_tracker/st_atom_clip.c5
-rw-r--r--src/mesa/state_tracker/st_atom_constbuf.c2
-rw-r--r--src/mesa/state_tracker/st_atom_pixeltransfer.c225
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.c145
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.h11
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap_shader.c174
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c335
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.h17
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels_shader.c278
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c2
-rw-r--r--src/mesa/state_tracker/st_cb_program.c27
-rw-r--r--src/mesa/state_tracker/st_context.c6
-rw-r--r--src/mesa/state_tracker/st_context.h9
-rw-r--r--src/mesa/state_tracker/st_debug.c2
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp260
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.h10
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c50
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.h4
-rw-r--r--src/mesa/state_tracker/st_program.c566
-rw-r--r--src/mesa/state_tracker/st_program.h47
-rw-r--r--src/mesa/tnl/t_draw.c1
-rw-r--r--src/mesa/vbo/vbo_context.c104
-rw-r--r--src/mesa/vbo/vbo_exec.h19
-rw-r--r--src/mesa/vbo/vbo_exec_api.c61
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c25
-rw-r--r--src/mesa/vbo/vbo_save_api.c5
-rw-r--r--src/util/Makefile.sources2
-rw-r--r--src/util/half_float.c177
-rw-r--r--src/util/half_float.h41
-rw-r--r--src/vulkan/Makefile.am1
-rw-r--r--src/vulkan/anv_compiler.cpp34
-rw-r--r--src/vulkan/anv_meta.c42
-rw-r--r--src/vulkan/anv_nir_builder.h46
318 files changed, 10004 insertions, 6872 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 13cfaa5b367..da638a811fb 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -66,7 +66,6 @@ AM_CPPFLAGS = \
noinst_LTLIBRARIES = libglsl_util.la
libglsl_util_la_SOURCES = \
- glsl/shader_enums.c \
mesa/main/imports.c \
mesa/program/prog_hash_table.c \
mesa/program/symbol_table.c \
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 1fa36416b8e..9df4e265b5b 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -137,6 +137,8 @@ C_SOURCES := \
tgsi/tgsi_dump.h \
tgsi/tgsi_exec.c \
tgsi/tgsi_exec.h \
+ tgsi/tgsi_emulate.c \
+ tgsi/tgsi_emulate.h \
tgsi/tgsi_info.c \
tgsi/tgsi_info.h \
tgsi/tgsi_iterate.c \
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index c4ae30461cb..c88dfbf974a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -129,7 +129,8 @@ lp_build_emit_llvm_unary(
unsigned tgsi_opcode,
LLVMValueRef arg0)
{
- struct lp_build_emit_data emit_data;
+ struct lp_build_emit_data emit_data = {{0}};
+ emit_data.info = tgsi_get_opcode_info(tgsi_opcode);
emit_data.arg_count = 1;
emit_data.args[0] = arg0;
return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
@@ -142,7 +143,8 @@ lp_build_emit_llvm_binary(
LLVMValueRef arg0,
LLVMValueRef arg1)
{
- struct lp_build_emit_data emit_data;
+ struct lp_build_emit_data emit_data = {{0}};
+ emit_data.info = tgsi_get_opcode_info(tgsi_opcode);
emit_data.arg_count = 2;
emit_data.args[0] = arg0;
emit_data.args[1] = arg1;
@@ -157,7 +159,8 @@ lp_build_emit_llvm_ternary(
LLVMValueRef arg1,
LLVMValueRef arg2)
{
- struct lp_build_emit_data emit_data;
+ struct lp_build_emit_data emit_data = {{0}};
+ emit_data.info = tgsi_get_opcode_info(tgsi_opcode);
emit_data.arg_count = 3;
emit_data.args[0] = arg0;
emit_data.args[1] = arg1;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 0ad78b0ace2..3d5e2cb316b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -538,12 +538,19 @@ lrp_emit(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
- LLVMValueRef tmp;
- tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB,
- emit_data->args[1],
- emit_data->args[2]);
- emit_data->output[emit_data->chan] = lp_build_emit_llvm_ternary(bld_base,
- TGSI_OPCODE_MAD, emit_data->args[0], tmp, emit_data->args[2]);
+ struct lp_build_context *bld = &bld_base->base;
+ LLVMValueRef inv, a, b;
+
+ /* This uses the correct version: (1 - t)*a + t*b
+ *
+ * An alternative version is "a + t*(b-a)". The problem is this version
+ * doesn't return "b" for t = 1, because "a + (b-a)" isn't equal to "b"
+ * because of the floating-point rounding.
+ */
+ inv = lp_build_sub(bld, bld_base->base.one, emit_data->args[0]);
+ a = lp_build_mul(bld, emit_data->args[1], emit_data->args[0]);
+ b = lp_build_mul(bld, emit_data->args[2], inv);
+ emit_data->output[emit_data->chan] = lp_build_add(bld, a, b);
}
/* TGSI_OPCODE_MAD */
diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index 95eed2698bc..ffe30b8fa79 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -987,6 +987,9 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
case ',':
env++;
+ if (!pane)
+ break;
+
y += height + hud->font.glyph_height * (pane->num_graphs + 2);
height = 100;
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index cf43ef2506f..0539cfc16a1 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -27,7 +27,7 @@
#include "glsl/nir/nir_control_flow.h"
#include "glsl/nir/nir_builder.h"
#include "glsl/list.h"
-#include "glsl/shader_enums.h"
+#include "glsl/nir/shader_enums.h"
#include "nir/tgsi_to_nir.h"
#include "tgsi/tgsi_parse.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 8ceb5b47584..5d80cca5b0e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -648,6 +648,7 @@ tgsi_dump_instruction(
ctx.indent = 0;
ctx.dump_printf = dump_ctx_printf;
ctx.indentation = 0;
+ ctx.file = NULL;
iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_emulate.c b/src/gallium/auxiliary/tgsi/tgsi_emulate.c
new file mode 100644
index 00000000000..59d2e4c95b1
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_emulate.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_debug.h"
+
+#include "tgsi_emulate.h"
+
+struct tgsi_emulation_context {
+ struct tgsi_transform_context base;
+ struct tgsi_shader_info info;
+ unsigned flags;
+ bool first_instruction_emitted;
+};
+
+static inline struct tgsi_emulation_context *
+tgsi_emulation_context(struct tgsi_transform_context *tctx)
+{
+ return (struct tgsi_emulation_context *)tctx;
+}
+
+static void
+transform_decl(struct tgsi_transform_context *tctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct tgsi_emulation_context *ctx = tgsi_emulation_context(tctx);
+
+ if (ctx->flags & TGSI_EMU_FORCE_PERSAMPLE_INTERP &&
+ decl->Declaration.File == TGSI_FILE_INPUT) {
+ assert(decl->Declaration.Interpolate);
+ decl->Interp.Location = TGSI_INTERPOLATE_LOC_SAMPLE;
+ }
+
+ tctx->emit_declaration(tctx, decl);
+}
+
+static void
+passthrough_edgeflag(struct tgsi_transform_context *tctx)
+{
+ struct tgsi_emulation_context *ctx = tgsi_emulation_context(tctx);
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction new_inst;
+
+ /* Input */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Range.First = decl.Range.Last = ctx->info.num_inputs;
+ tctx->emit_declaration(tctx, &decl);
+
+ /* Output */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_OUTPUT;
+ decl.Declaration.Semantic = true;
+ decl.Range.First = decl.Range.Last = ctx->info.num_outputs;
+ decl.Semantic.Name = TGSI_SEMANTIC_EDGEFLAG;
+ decl.Semantic.Index = 0;
+ tctx->emit_declaration(tctx, &decl);
+
+ /* MOV */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+
+ new_inst.Instruction.NumDstRegs = 1;
+ new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
+ new_inst.Dst[0].Register.Index = ctx->info.num_outputs;
+ new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+ new_inst.Instruction.NumSrcRegs = 1;
+ new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
+ new_inst.Src[0].Register.Index = ctx->info.num_inputs;
+ new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+ new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+ new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
+ new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
+
+ tctx->emit_instruction(tctx, &new_inst);
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct tgsi_emulation_context *ctx = tgsi_emulation_context(tctx);
+
+ /* Pass through edgeflags. */
+ if (!ctx->first_instruction_emitted) {
+ ctx->first_instruction_emitted = true;
+
+ if (ctx->flags & TGSI_EMU_PASSTHROUGH_EDGEFLAG)
+ passthrough_edgeflag(tctx);
+ }
+
+ /* Clamp color outputs. */
+ if (ctx->flags & TGSI_EMU_CLAMP_COLOR_OUTPUTS) {
+ int i;
+ for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ unsigned semantic;
+
+ if (inst->Dst[i].Register.File != TGSI_FILE_OUTPUT ||
+ inst->Dst[i].Register.Indirect)
+ continue;
+
+ semantic =
+ ctx->info.output_semantic_name[inst->Dst[i].Register.Index];
+
+ if (semantic == TGSI_SEMANTIC_COLOR ||
+ semantic == TGSI_SEMANTIC_BCOLOR)
+ inst->Instruction.Saturate = true;
+ }
+ }
+
+ tctx->emit_instruction(tctx, inst);
+}
+
+const struct tgsi_token *
+tgsi_emulate(const struct tgsi_token *tokens, unsigned flags)
+{
+ struct tgsi_emulation_context ctx;
+ struct tgsi_token *newtoks;
+ int newlen;
+
+ if (!(flags & (TGSI_EMU_CLAMP_COLOR_OUTPUTS |
+ TGSI_EMU_PASSTHROUGH_EDGEFLAG |
+ TGSI_EMU_FORCE_PERSAMPLE_INTERP)))
+ return NULL;
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.flags = flags;
+ tgsi_scan_shader(tokens, &ctx.info);
+
+ if (flags & TGSI_EMU_FORCE_PERSAMPLE_INTERP)
+ ctx.base.transform_declaration = transform_decl;
+
+ if (flags & (TGSI_EMU_CLAMP_COLOR_OUTPUTS |
+ TGSI_EMU_PASSTHROUGH_EDGEFLAG))
+ ctx.base.transform_instruction = transform_instr;
+
+ newlen = tgsi_num_tokens(tokens) + 20;
+ newtoks = tgsi_alloc_tokens(newlen);
+ if (!newtoks)
+ return NULL;
+
+ tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+ return newtoks;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_emulate.h b/src/gallium/auxiliary/tgsi/tgsi_emulate.h
new file mode 100644
index 00000000000..425cec72ee1
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_emulate.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef TGSI_GL_EMULATION_H_
+#define TGSI_GL_EMULATION_H_
+
+#include "pipe/p_shader_tokens.h"
+
+#define TGSI_EMU_CLAMP_COLOR_OUTPUTS (1 << 0)
+#define TGSI_EMU_PASSTHROUGH_EDGEFLAG (1 << 1)
+#define TGSI_EMU_FORCE_PERSAMPLE_INTERP (1 << 2)
+
+const struct tgsi_token *
+tgsi_emulate(const struct tgsi_token *tokens, unsigned flags);
+
+#endif /* TGSI_GL_EMULATION_H_ */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index d76dddbf7d9..b84a1753eeb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -409,6 +409,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->writes_edgeflag = TRUE;
}
}
+ } else if (file == TGSI_FILE_SAMPLER) {
+ info->samplers_declared |= 1 << reg;
}
}
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 3ceb55717ee..d60ccabda6d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -64,6 +64,7 @@ struct tgsi_shader_info
uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */
int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */
int const_file_max[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned samplers_declared; /**< bitmask of declared samplers */
ubyte input_array_first[PIPE_MAX_SHADER_INPUTS];
ubyte input_array_last[PIPE_MAX_SHADER_INPUTS];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 3d213195090..f2f518130fb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -35,6 +35,7 @@
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_sanity.h"
#include "util/u_debug.h"
+#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_bitmask.h"
@@ -1830,29 +1831,6 @@ void ureg_free_tokens( const struct tgsi_token *tokens )
}
-static inline unsigned
-pipe_shader_from_tgsi_processor(unsigned processor)
-{
- switch (processor) {
- case TGSI_PROCESSOR_VERTEX:
- return PIPE_SHADER_VERTEX;
- case TGSI_PROCESSOR_TESS_CTRL:
- return PIPE_SHADER_TESS_CTRL;
- case TGSI_PROCESSOR_TESS_EVAL:
- return PIPE_SHADER_TESS_EVAL;
- case TGSI_PROCESSOR_GEOMETRY:
- return PIPE_SHADER_GEOMETRY;
- case TGSI_PROCESSOR_FRAGMENT:
- return PIPE_SHADER_FRAGMENT;
- case TGSI_PROCESSOR_COMPUTE:
- return PIPE_SHADER_COMPUTE;
- default:
- assert(0);
- return PIPE_SHADER_VERTEX;
- }
-}
-
-
struct ureg_program *
ureg_create(unsigned processor)
{
@@ -1872,7 +1850,7 @@ ureg_create_with_screen(unsigned processor, struct pipe_screen *screen)
ureg->supports_any_inout_decl_range =
screen &&
screen->get_shader_param(screen,
- pipe_shader_from_tgsi_processor(processor),
+ util_pipe_shader_from_tgsi_processor(processor),
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0;
for (i = 0; i < Elements(ureg->properties); i++)
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 5fe9e33e208..7388a499c74 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -276,7 +276,7 @@ debug_get_flags_option(const char *name,
for (; flags->name; ++flags)
namealign = MAX2(namealign, strlen(flags->name));
for (flags = orig; flags->name; ++flags)
- _debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name,
+ _debug_printf("| %*s [0x%0*"PRIx64"]%s%s\n", namealign, flags->name,
(int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
flags->desc ? " " : "", flags->desc ? flags->desc : "");
}
@@ -291,9 +291,9 @@ debug_get_flags_option(const char *name,
if (debug_get_option_should_print()) {
if (str) {
- debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str);
+ debug_printf("%s: %s = 0x%"PRIx64" (%s)\n", __FUNCTION__, name, result, str);
} else {
- debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result);
+ debug_printf("%s: %s = 0x%"PRIx64"\n", __FUNCTION__, name, result);
}
}
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index bb99a02ce49..384e267b593 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -651,6 +651,28 @@ util_max_layer(const struct pipe_resource *r, unsigned level)
}
}
+static inline unsigned
+util_pipe_shader_from_tgsi_processor(unsigned processor)
+{
+ switch (processor) {
+ case TGSI_PROCESSOR_VERTEX:
+ return PIPE_SHADER_VERTEX;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return PIPE_SHADER_TESS_CTRL;
+ case TGSI_PROCESSOR_TESS_EVAL:
+ return PIPE_SHADER_TESS_EVAL;
+ case TGSI_PROCESSOR_GEOMETRY:
+ return PIPE_SHADER_GEOMETRY;
+ case TGSI_PROCESSOR_FRAGMENT:
+ return PIPE_SHADER_FRAGMENT;
+ case TGSI_PROCESSOR_COMPUTE:
+ return PIPE_SHADER_COMPUTE;
+ default:
+ assert(0);
+ return PIPE_SHADER_VERTEX;
+ }
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index 3d2193c3bf5..b31ada138b8 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -544,6 +544,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
index = ffs(unused_vb_mask) - 1;
fallback_vbs[type] = index;
+ unused_vb_mask &= ~(1 << index);
/*printf("found slot=%i for type=%i\n", index, type);*/
}
}
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am
index dff95ba5270..3de8e0fd5ad 100644
--- a/src/gallium/drivers/freedreno/Makefile.am
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -19,7 +19,7 @@ libfreedreno_la_SOURCES = \
noinst_PROGRAMS = ir3_compiler
-# XXX: Required due to the C++ sources in libnir/libglsl_util
+# XXX: Required due to the C++ sources in libnir
nodist_EXTRA_ir3_compiler_SOURCES = dummy.cpp
ir3_compiler_SOURCES = \
ir3/ir3_cmdline.c
@@ -28,7 +28,6 @@ ir3_compiler_LDADD = \
libfreedreno.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/glsl/libnir.la \
- $(top_builddir)/src/libglsl_util.la \
$(top_builddir)/src/util/libmesautil.la \
$(GALLIUM_COMMON_LIB_DEPS) \
$(FREEDRENO_LIBS)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 6153d92dc21..411f5b76329 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -798,11 +798,7 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
- OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
- OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
- OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
- A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
- A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+ fd3_emit_cache_flush(ctx, ring);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index 795654706a7..42483f6c39b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -90,4 +90,15 @@ void fd3_emit_restore(struct fd_context *ctx);
void fd3_emit_init(struct pipe_context *pctx);
+static inline void
+fd3_emit_cache_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ fd_wfi(ctx, ring);
+ OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+}
+
#endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 9a5b45e2fcb..21fb59e450d 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -558,6 +558,8 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, fui(x1));
OUT_RING(ring, fui(y1));
+ fd3_emit_cache_flush(ctx, ring);
+
for (i = 0; i < 4; i++) {
OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 6831a58749c..7bf3343f43a 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -187,6 +187,9 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
for (i = 0; i < ctx->streamout.num_targets; i++)
ctx->streamout.offsets[i] += prims;
+ if (fd_mesa_debug & FD_DBG_DDRAW)
+ ctx->dirty = 0xffffffff;
+
/* if an app (or, well, piglit test) does many thousands of draws
* without flush (or anything which implicitly flushes, like
* changing render targets), we can exceed the ringbuffer size.
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 0d0100590d6..b64f78ca32b 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -61,7 +61,7 @@ static const struct debug_named_value debug_options[] = {
{"msgs", FD_DBG_MSGS, "Print debug messages"},
{"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"},
{"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"},
- {"flush", FD_DBG_FLUSH, "Force flush after every draw"},
+ {"ddraw", FD_DBG_DDRAW, "Mark all state dirty after draw"},
{"noscis", FD_DBG_NOSCIS, "Disable scissor optimization"},
{"direct", FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"},
{"nobypass", FD_DBG_NOBYPASS, "Disable GMEM bypass"},
@@ -70,6 +70,7 @@ static const struct debug_named_value debug_options[] = {
{"optmsgs", FD_DBG_OPTMSGS,"Enable optimizer debug messages"},
{"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 1.20 (rather than 1.30) on a3xx+"},
{"shaderdb", FD_DBG_SHADERDB, "Enable shaderdb output"},
+ {"flush", FD_DBG_FLUSH, "Force flush after every draw"},
DEBUG_NAMED_VALUE_END
};
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 7129a1bddd1..0d2418e1e00 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -63,7 +63,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
#define FD_DBG_MSGS 0x0001
#define FD_DBG_DISASM 0x0002
#define FD_DBG_DCLEAR 0x0004
-#define FD_DBG_FLUSH 0x0008
+#define FD_DBG_DDRAW 0x0008
#define FD_DBG_NOSCIS 0x0010
#define FD_DBG_DIRECT 0x0020
#define FD_DBG_NOBYPASS 0x0040
@@ -72,6 +72,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
#define FD_DBG_OPTMSGS 0x0200
#define FD_DBG_GLSL120 0x0400
#define FD_DBG_SHADERDB 0x0800
+#define FD_DBG_FLUSH 0x1000
extern int fd_mesa_debug;
extern bool fd_binning_enabled;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 7eddbdd3825..8c9234b3847 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -36,7 +36,6 @@
#include "tgsi/tgsi_strings.h"
#include "nir/tgsi_to_nir.h"
-#include "glsl/shader_enums.h"
#include "freedreno_util.h"
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.h b/src/gallium/drivers/freedreno/ir3/ir3_nir.h
index f3d3075e6a6..9950782dc38 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.h
@@ -30,6 +30,7 @@
#define IR3_NIR_H_
#include "glsl/nir/nir.h"
+#include "glsl/nir/shader_enums.h"
bool ir3_nir_lower_if_else(nir_shader *shader);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 6dc0ce1133f..7e2c27d9765 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -30,7 +30,7 @@
#define IR3_SHADER_H_
#include "pipe/p_state.h"
-#include "glsl/shader_enums.h"
+#include "glsl/nir/shader_enums.h"
#include "ir3.h"
#include "disasm.h"
diff --git a/src/gallium/drivers/ilo/core/ilo_builder.c b/src/gallium/drivers/ilo/core/ilo_builder.c
index 4e05a3aca1e..9d5195129b7 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder.c
+++ b/src/gallium/drivers/ilo/core/ilo_builder.c
@@ -25,6 +25,8 @@
* Chia-I Wu <[email protected]>
*/
+#include "util/u_memory.h"
+
#include "ilo_builder.h"
#include "ilo_builder_render.h" /* for ilo_builder_batch_patch_sba() */
diff --git a/src/gallium/drivers/ilo/core/ilo_core.h b/src/gallium/drivers/ilo/core/ilo_core.h
index da7db90a54b..cbc568c4cd0 100644
--- a/src/gallium/drivers/ilo/core/ilo_core.h
+++ b/src/gallium/drivers/ilo/core/ilo_core.h
@@ -30,8 +30,6 @@
#include "pipe/p_compiler.h"
-#include "util/u_debug.h"
#include "util/u_math.h"
-#include "util/u_memory.h"
#endif /* ILO_CORE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_debug.h b/src/gallium/drivers/ilo/core/ilo_debug.h
index 9833233d796..532a2aa7ed6 100644
--- a/src/gallium/drivers/ilo/core/ilo_debug.h
+++ b/src/gallium/drivers/ilo/core/ilo_debug.h
@@ -28,6 +28,8 @@
#ifndef ILO_DEBUG_H
#define ILO_DEBUG_H
+#include "util/u_debug.h"
+
#include "ilo_core.h"
/* enable debug flags affecting hot pathes only with debug builds */
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index fa547ac5c36..6eefc8f46d2 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -286,8 +286,8 @@ image_get_gen6_tiling(const struct ilo_dev *dev,
info->bind_surface_dp_typed))
return GEN6_TILING_NONE;
- if (estimated_size <= 64 ||
- estimated_size > info->prefer_linear_threshold)
+ if (estimated_size <= 64 || (info->prefer_linear_threshold &&
+ estimated_size > info->prefer_linear_threshold))
return GEN6_TILING_NONE;
if (estimated_size <= 2048)
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index 646ed6f5727..546e0ff7739 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -102,7 +102,7 @@ struct ilo_image_info {
/*
* prefer GEN6_TILING_NONE when the (estimated) image size exceeds the
- * threshold
+ * threshold; ignored when zero
*/
uint32_t prefer_linear_threshold;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_cc.c b/src/gallium/drivers/ilo/core/ilo_state_cc.c
index 83ee8de979c..1f2456e19ea 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_cc.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_cc.c
@@ -694,10 +694,10 @@ cc_set_gen8_3DSTATE_PS_BLEND(struct ilo_state_cc *cc,
cc_get_gen6_effective_rt(dev, info, 0, &rt0);
/* 0x0 is reserved for blend factors and we have to set them all */
- dw1 |= rt0.a_src << GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR__SHIFT |
- rt0.a_dst << GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR__SHIFT |
- rt0.rgb_src << GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR__SHIFT |
- rt0.rgb_dst << GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR__SHIFT;
+ dw1 |= rt0.a_src << GEN8_PS_BLEND_DW1_RT0_SRC_ALPHA_FACTOR__SHIFT |
+ rt0.a_dst << GEN8_PS_BLEND_DW1_RT0_DST_ALPHA_FACTOR__SHIFT |
+ rt0.rgb_src << GEN8_PS_BLEND_DW1_RT0_SRC_COLOR_FACTOR__SHIFT |
+ rt0.rgb_dst << GEN8_PS_BLEND_DW1_RT0_DST_COLOR_FACTOR__SHIFT;
for (i = 0; i < blend->rt_count; i++) {
if (blend->rt[i].argb_write_disables != 0xf) {
@@ -707,10 +707,10 @@ cc_set_gen8_3DSTATE_PS_BLEND(struct ilo_state_cc *cc,
}
if (rt0.blend_enable) {
- dw1 |= GEN8_PS_BLEND_DW1_BLEND_ENABLE;
+ dw1 |= GEN8_PS_BLEND_DW1_RT0_BLEND_ENABLE;
if (rt0.a_src != rt0.rgb_src || rt0.a_dst != rt0.rgb_dst)
- dw1 |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
+ dw1 |= GEN8_PS_BLEND_DW1_RT0_INDEPENDENT_ALPHA_ENABLE;
}
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_raster.c b/src/gallium/drivers/ilo/core/ilo_state_raster.c
index ed64a1f0d3c..a694f71bbbf 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_raster.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_raster.c
@@ -512,7 +512,7 @@ raster_set_gen8_3DSTATE_RASTER(struct ilo_state_raster *rs,
/* where should line_msaa_enable be set? */
if (setup->msaa_enable)
- dw1 |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
+ dw1 |= GEN8_RASTER_DW1_DX_MULTISAMPLE_ENABLE;
if (tri->depth_offset_solid)
dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
@@ -574,10 +574,6 @@ get_gen6_sample_count(const struct ilo_dev *dev, uint8_t sample_count)
c = GEN7_NUMSAMPLES_8;
min_gen = ILO_GEN(7);
break;
- case 16:
- c = GEN8_NUMSAMPLES_16;
- min_gen = ILO_GEN(8);
- break;
default:
assert(!"unexpected sample count");
c = GEN6_NUMSAMPLES_1;
@@ -792,17 +788,17 @@ raster_set_gen8_3DSTATE_WM(struct ilo_state_raster *rs,
if (ilo_dev_gen(dev) < ILO_GEN(8)) {
switch (scan->earlyz_op) {
case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR:
- dw1 |= GEN7_WM_DW1_DEPTH_CLEAR;
+ dw1 |= GEN7_WM_DW1_LEGACY_DEPTH_CLEAR;
break;
case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE:
- dw1 |= GEN7_WM_DW1_DEPTH_RESOLVE;
+ dw1 |= GEN7_WM_DW1_LEGACY_DEPTH_RESOLVE;
break;
case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE:
- dw1 |= GEN7_WM_DW1_HIZ_RESOLVE;
+ dw1 |= GEN7_WM_DW1_LEGACY_HIZ_RESOLVE;
break;
default:
if (scan->earlyz_stencil_clear)
- dw1 |= GEN7_WM_DW1_DEPTH_CLEAR;
+ dw1 |= GEN7_WM_DW1_LEGACY_DEPTH_CLEAR;
break;
}
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sbe.c b/src/gallium/drivers/ilo/core/ilo_state_sbe.c
index 5d1d400acdd..1b4ca0683c9 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_sbe.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_sbe.c
@@ -239,8 +239,8 @@ sbe_set_gen8_3DSTATE_SBE(struct ilo_state_sbe *sbe,
vue_read_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- dw1 |= GEN8_SBE_DW1_USE_URB_READ_LEN |
- GEN8_SBE_DW1_USE_URB_READ_OFFSET |
+ dw1 |= GEN8_SBE_DW1_FORCE_URB_READ_LEN |
+ GEN8_SBE_DW1_FORCE_URB_READ_OFFSET |
vue_read_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT;
} else {
dw1 |= vue_read_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
@@ -286,10 +286,10 @@ sbe_set_gen8_3DSTATE_SBE_SWIZ(struct ilo_state_sbe *sbe,
swizzle->attr << GEN8_SBE_SWIZ_SRC_ATTR__SHIFT;
if (swizzle->force_zeros) {
- swiz[i] |= GEN8_SBE_SWIZ_OVERRIDE_W |
- GEN8_SBE_SWIZ_OVERRIDE_Z |
- GEN8_SBE_SWIZ_OVERRIDE_Y |
- GEN8_SBE_SWIZ_OVERRIDE_X |
+ swiz[i] |= GEN8_SBE_SWIZ_CONST_OVERRIDE_W |
+ GEN8_SBE_SWIZ_CONST_OVERRIDE_Z |
+ GEN8_SBE_SWIZ_CONST_OVERRIDE_Y |
+ GEN8_SBE_SWIZ_CONST_OVERRIDE_X |
GEN8_SBE_SWIZ_CONST_0000;
}
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
index f4d801e9b56..ceeb68a460e 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
@@ -592,7 +592,12 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
ILO_DEV_ASSERT(dev, 8, 8);
- dw3 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+ /*
+ * Set VME here for correct computation of LODs and others. Not sure why
+ * it is needed now.
+ */
+ dw3 = GEN6_THREADDISP_VME |
+ ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
if (false)
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c
index 40fe15f316f..27c37535fc8 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c
@@ -814,10 +814,6 @@ surface_get_gen6_image_sample_count(const struct ilo_dev *dev,
*sample_count = GEN7_NUMSAMPLES_8;
min_gen = ILO_GEN(7);
break;
- case 16:
- *sample_count = GEN8_NUMSAMPLES_16;
- min_gen = ILO_GEN(8);
- break;
default:
assert(!"invalid sample count");
*sample_count = GEN6_NUMSAMPLES_1;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c
index 9faf835fef2..8f091e21a27 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_vf.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c
@@ -369,14 +369,14 @@ vf_params_set_gen8_3DSTATE_VF_SGVS(struct ilo_state_vf *vf,
if (params->prepend_instanceid) {
dw1 |= GEN8_SGVS_DW1_IID_ENABLE |
- 1 << GEN8_SGVS_DW1_IID_VE_COMP__SHIFT |
- attr << GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT;
+ 1 << GEN8_SGVS_DW1_IID_COMP__SHIFT |
+ attr << GEN8_SGVS_DW1_IID_OFFSET__SHIFT;
}
if (params->prepend_vertexid) {
dw1 |= GEN8_SGVS_DW1_VID_ENABLE |
- 0 << GEN8_SGVS_DW1_VID_VE_COMP__SHIFT |
- attr << GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT;
+ 0 << GEN8_SGVS_DW1_VID_COMP__SHIFT |
+ attr << GEN8_SGVS_DW1_VID_OFFSET__SHIFT;
}
STATIC_ASSERT(ARRAY_SIZE(vf->sgvs) >= 1);
diff --git a/src/gallium/drivers/ilo/genhw/gen_eu_message.xml.h b/src/gallium/drivers/ilo/genhw/gen_eu_message.xml.h
index fe8b26908c0..96cf543d27e 100644
--- a/src/gallium/drivers/ilo/genhw/gen_eu_message.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_eu_message.xml.h
@@ -41,7 +41,9 @@ enum gen_eu_urb_op {
GEN7_MSG_URB_READ_OWORD = 0x3,
GEN7_MSG_URB_ATOMIC_MOV = 0x4,
GEN7_MSG_URB_ATOMIC_INC = 0x5,
+ GEN75_MSG_URB_ATOMIC_ADD = 0x6,
GEN8_MSG_URB_SIMD8_WRITE = 0x7,
+ GEN8_MSG_URB_SIMD8_READ = 0x8,
};
enum gen_eu_pi_simd {
@@ -137,6 +139,7 @@ enum gen_eu_dp_op {
GEN75_MSG_DP_RC_MEMORY_FENCE = 0x7,
GEN75_MSG_DP_RC_MEDIA_BLOCK_WRITE = 0xa,
GEN75_MSG_DP_RC_RT_WRITE = 0xc,
+ GEN8_MSG_DP_RC_RT_READ = 0xd,
GEN75_MSG_DP_CC_OWORD_BLOCK_READ = 0x0,
GEN75_MSG_DP_CC_UNALIGNED_OWORD_BLOCK_READ = 0x1,
GEN75_MSG_DP_CC_OWORD_DUAL_BLOCK_READ = 0x2,
diff --git a/src/gallium/drivers/ilo/genhw/gen_mi.xml.h b/src/gallium/drivers/ilo/genhw/gen_mi.xml.h
index 5a0bb4f8d77..36f9618eb2d 100644
--- a/src/gallium/drivers/ilo/genhw/gen_mi.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_mi.xml.h
@@ -84,6 +84,8 @@ enum gen_mi_alu_operand {
#define GEN7_MI_OPCODE_MI_PREDICATE (0xc << 23)
#define GEN7_MI_OPCODE_MI_URB_CLEAR (0x19 << 23)
#define GEN75_MI_OPCODE_MI_MATH (0x1a << 23)
+#define GEN8_MI_OPCODE_MI_SEMAPHORE_SIGNAL (0x1b << 23)
+#define GEN8_MI_OPCODE_MI_SEMAPHORE_WAIT (0x1c << 23)
#define GEN6_MI_OPCODE_MI_STORE_DATA_IMM (0x20 << 23)
#define GEN6_MI_OPCODE_MI_LOAD_REGISTER_IMM (0x22 << 23)
#define GEN6_MI_OPCODE_MI_STORE_REGISTER_MEM (0x24 << 23)
@@ -91,8 +93,11 @@ enum gen_mi_alu_operand {
#define GEN6_MI_OPCODE_MI_REPORT_PERF_COUNT (0x28 << 23)
#define GEN7_MI_OPCODE_MI_LOAD_REGISTER_MEM (0x29 << 23)
#define GEN75_MI_OPCODE_MI_LOAD_REGISTER_REG (0x2a << 23)
+#define GEN75_MI_OPCODE_MI_RS_STORE_DATA_IMM (0x2b << 23)
#define GEN75_MI_OPCODE_MI_LOAD_URB_MEM (0x2c << 23)
#define GEN75_MI_OPCODE_MI_STORE_URB_MEM (0x2d << 23)
+#define GEN8_MI_OPCODE_MI_COPY_MEM_MEM (0x2e << 23)
+#define GEN8_MI_OPCODE_MI_ATOMIC (0x2f << 23)
#define GEN6_MI_OPCODE_MI_BATCH_BUFFER_START (0x31 << 23)
#define GEN6_MI_LENGTH__MASK 0x0000003f
#define GEN6_MI_LENGTH__SHIFT 0
@@ -155,8 +160,41 @@ enum gen_mi_alu_operand {
#define GEN75_MI_MATH_DW_SRC2__MASK 0x000007ff
#define GEN75_MI_MATH_DW_SRC2__SHIFT 0
+#define GEN8_MI_SEMAPHORE_SIGNAL__SIZE 2
+#define GEN8_MI_SEMAPHORE_SIGNAL_DW0_POST_SYNC_OP (0x1 << 21)
+#define GEN8_MI_SEMAPHORE_SIGNAL_DW0_ENGINE__MASK 0x00038000
+#define GEN8_MI_SEMAPHORE_SIGNAL_DW0_ENGINE__SHIFT 15
+#define GEN8_MI_SEMAPHORE_SIGNAL_DW0_ENGINE_RCS (0x0 << 15)
+#define GEN8_MI_SEMAPHORE_SIGNAL_DW0_ENGINE_VCS0 (0x1 << 15)
+#define GEN8_MI_SEMAPHORE_SIGNAL_DW0_ENGINE_BCS (0x2 << 15)
+#define GEN8_MI_SEMAPHORE_SIGNAL_DW0_ENGINE_VECS (0x3 << 15)
+#define GEN8_MI_SEMAPHORE_SIGNAL_DW0_ENGINE_VCS1 (0x4 << 15)
+
+
+#define GEN8_MI_SEMAPHORE_WAIT__SIZE 4
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_USE_GGTT (0x1 << 22)
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_WAIT_MODE__MASK 0x00008000
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_WAIT_MODE__SHIFT 15
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_WAIT_MODE_SIGNAL (0x0 << 15)
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_WAIT_MODE_POLL (0x1 << 15)
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_OP__MASK 0x00007000
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_OP__SHIFT 12
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_OP_SAD_GREATER_THAN_SDD (0x0 << 12)
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_OP_SAD_GREATER_THAN_OR_EQUAL_SDD (0x1 << 12)
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_OP_SAD_LESS_THAN_SDD (0x2 << 12)
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_OP_SAD_LESS_THAN_OR_EQUAL_SDD (0x3 << 12)
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_OP_SAD_EQUAL_SDD (0x4 << 12)
+#define GEN8_MI_SEMAPHORE_WAIT_DW0_OP_SAD_NO_EQUAL_SDD (0x5 << 12)
+
+
+#define GEN8_MI_SEMAPHORE_WAIT_DW2_ADDR_ADDR__MASK 0xfffffffc
+#define GEN8_MI_SEMAPHORE_WAIT_DW2_ADDR_ADDR__SHIFT 2
+#define GEN8_MI_SEMAPHORE_WAIT_DW2_ADDR_ADDR__SHR 2
+
+
#define GEN6_MI_STORE_DATA_IMM__SIZE 6
#define GEN6_MI_STORE_DATA_IMM_DW0_USE_GGTT (0x1 << 22)
+#define GEN8_MI_STORE_DATA_IMM_DW0_STORE_QWORD (0x1 << 21)
#define GEN6_MI_STORE_DATA_IMM_DW2_ADDR__MASK 0xfffffffc
@@ -188,7 +226,17 @@ enum gen_mi_alu_operand {
#define GEN6_MI_STORE_REGISTER_MEM_DW2_ADDR__SHR 2
-#define GEN6_MI_FLUSH_DW__SIZE 4
+#define GEN6_MI_FLUSH_DW__SIZE 5
+#define GEN6_MI_FLUSH_DW_DW0_WRITE__MASK 0x0000c000
+#define GEN6_MI_FLUSH_DW_DW0_WRITE__SHIFT 14
+#define GEN6_MI_FLUSH_DW_DW0_WRITE_NONE (0x0 << 14)
+#define GEN6_MI_FLUSH_DW_DW0_WRITE_IMM (0x1 << 14)
+#define GEN6_MI_FLUSH_DW_DW0_WRITE_TIMESTAMP (0x3 << 14)
+
+#define GEN6_MI_FLUSH_DW_DW1_USE_GGTT (0x1 << 2)
+#define GEN6_MI_FLUSH_DW_DW1_ADDR__MASK 0xfffffff8
+#define GEN6_MI_FLUSH_DW_DW1_ADDR__SHIFT 3
+#define GEN6_MI_FLUSH_DW_DW1_ADDR__SHR 3
@@ -225,6 +273,17 @@ enum gen_mi_alu_operand {
#define GEN75_MI_LOAD_REGISTER_REG_DW2_DST_REG__SHIFT 2
#define GEN75_MI_LOAD_REGISTER_REG_DW2_DST_REG__SHR 2
+#define GEN75_MI_RS_STORE_DATA_IMM__SIZE 6
+#define GEN75_MI_RS_STORE_DATA_IMM_DW0_USE_GGTT (0x1 << 22)
+
+
+#define GEN75_MI_RS_STORE_DATA_IMM_DW2_ADDR__MASK 0xfffffffc
+#define GEN75_MI_RS_STORE_DATA_IMM_DW2_ADDR__SHIFT 2
+#define GEN75_MI_RS_STORE_DATA_IMM_DW2_ADDR__SHR 2
+
+
+
+
#define GEN75_MI_LOAD_URB_MEM__SIZE 4
#define GEN75_MI_LOAD_URB_MEM_DW1_ADDR__MASK 0x00007ffc
@@ -247,12 +306,47 @@ enum gen_mi_alu_operand {
#define GEN75_MI_STORE_URB_MEM_DW2_ADDR__SHR 6
+#define GEN8_MI_COPY_MEM_MEM__SIZE 5
+#define GEN8_MI_COPY_MEM_MEM_DW0_USE_GGTT_SRC (0x1 << 22)
+#define GEN8_MI_COPY_MEM_MEM_DW0_USE_GGTT_DST (0x1 << 21)
+
+#define GEN8_MI_COPY_MEM_MEM_DW1_DST_ADDR__MASK 0xfffffffc
+#define GEN8_MI_COPY_MEM_MEM_DW1_DST_ADDR__SHIFT 2
+#define GEN8_MI_COPY_MEM_MEM_DW1_DST_ADDR__SHR 2
+
+
+#define GEN8_MI_COPY_MEM_MEM_DW3_SRC_ADDR__MASK 0xfffffffc
+#define GEN8_MI_COPY_MEM_MEM_DW3_SRC_ADDR__SHIFT 2
+#define GEN8_MI_COPY_MEM_MEM_DW3_SRC_ADDR__SHR 2
+
+
+#define GEN8_MI_ATOMIC__SIZE 11
+#define GEN8_MI_ATOMIC_DW0_USE_GGTT (0x1 << 22)
+#define GEN8_MI_ATOMIC_DW0_POST_SYNC_OP (0x1 << 21)
+#define GEN8_MI_ATOMIC_DW0_SIZE__MASK 0x00180000
+#define GEN8_MI_ATOMIC_DW0_SIZE__SHIFT 19
+#define GEN8_MI_ATOMIC_DW0_SIZE_DWORD (0x0 << 19)
+#define GEN8_MI_ATOMIC_DW0_SIZE_QWORD (0x1 << 19)
+#define GEN8_MI_ATOMIC_DW0_SIZE_OWORD (0x2 << 19)
+#define GEN8_MI_ATOMIC_DW0_INLINE_DATA (0x1 << 18)
+#define GEN8_MI_ATOMIC_DW0_CS_STALL (0x1 << 17)
+#define GEN8_MI_ATOMIC_DW0_RETURN_DATA_CONTROL (0x1 << 16)
+#define GEN8_MI_ATOMIC_DW0_OP__MASK 0x0000ff00
+#define GEN8_MI_ATOMIC_DW0_OP__SHIFT 8
+
+#define GEN8_MI_ATOMIC_DW1_ADDR__MASK 0xfffffffc
+#define GEN8_MI_ATOMIC_DW1_ADDR__SHIFT 2
+#define GEN8_MI_ATOMIC_DW1_ADDR__SHR 2
+
+
+
#define GEN6_MI_BATCH_BUFFER_START__SIZE 3
#define GEN75_MI_BATCH_BUFFER_START_DW0_SECOND_LEVEL (0x1 << 22)
#define GEN75_MI_BATCH_BUFFER_START_DW0_ADD_OFFSET_ENABLE (0x1 << 16)
#define GEN75_MI_BATCH_BUFFER_START_DW0_PREDICATION_ENABLE (0x1 << 15)
#define GEN75_MI_BATCH_BUFFER_START_DW0_NON_PRIVILEGED (0x1 << 13)
#define GEN6_MI_BATCH_BUFFER_START_DW0_CLEAR_COMMAND_BUFFER (0x1 << 11)
+#define GEN75_MI_BATCH_BUFFER_START_DW0_RS_ENABLE (0x1 << 10)
#define GEN6_MI_BATCH_BUFFER_START_DW0_USE_PPGTT (0x1 << 8)
#define GEN6_MI_BATCH_BUFFER_START_DW1_ADDR__MASK 0xfffffffc
diff --git a/src/gallium/drivers/ilo/genhw/gen_regs.xml.h b/src/gallium/drivers/ilo/genhw/gen_regs.xml.h
index c51e4f78bc0..54ec13eaafa 100644
--- a/src/gallium/drivers/ilo/genhw/gen_regs.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_regs.xml.h
@@ -37,6 +37,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN6_REG__SIZE 0x400000
#define GEN6_REG_NOPID 0x2094
+
+#define GEN6_REG_SO_PRIM_STORAGE_NEEDED 0x2280
+
+#define GEN6_REG_SO_NUM_PRIMS_WRITTEN 0x2288
+
+
+#define GEN7_REG_TS_GPGPU_THREADS_DISPATCHED 0x2290
+
#define GEN7_REG_HS_INVOCATION_COUNT 0x2300
#define GEN7_REG_DS_INVOCATION_COUNT 0x2308
@@ -95,10 +103,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN75_REG_CS_GPR__ESIZE 0x8
#define GEN75_REG_CS_GPR__LEN 0x10
+#define GEN7_REG_GPGPU_DISPATCHDIMX 0x2500
-#define GEN6_REG_SO_PRIM_STORAGE_NEEDED 0x2280
+#define GEN7_REG_GPGPU_DISPATCHDIMY 0x2504
-#define GEN6_REG_SO_NUM_PRIMS_WRITTEN 0x2288
+#define GEN7_REG_GPGPU_DISPATCHDIMZ 0x2508
#define GEN7_REG_SO_NUM_PRIMS_WRITTEN(i0) (0x5200 + 0x8*(i0))
@@ -118,8 +127,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN7_REG_CACHE_MODE_0_HIZ_RAW_STALL_OPT_DISABLE (0x1 << 2)
#define GEN7_REG_CACHE_MODE_1 0x7004
-#define GEN8_REG_CACHE_MODE_1_HIZ_NP_EARLY_Z_FAILS_DISABLE (0x1 << 13)
-#define GEN8_REG_CACHE_MODE_1_HIZ_NP_PMA_FIX_ENABLE (0x1 << 11)
+#define GEN8_REG_CACHE_MODE_1_NP_EARLY_Z_FAILS_DISABLE (0x1 << 13)
+#define GEN8_REG_CACHE_MODE_1_NP_PMA_FIX_ENABLE (0x1 << 11)
#define GEN8_REG_L3CNTLREG 0x7034
diff --git a/src/gallium/drivers/ilo/genhw/gen_render.xml.h b/src/gallium/drivers/ilo/genhw/gen_render.xml.h
index 2e86ba96ae2..43d271d838a 100644
--- a/src/gallium/drivers/ilo/genhw/gen_render.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_render.xml.h
@@ -102,6 +102,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN7_RENDER_OPCODE_3DSTATE_URB_HS (0x31 << 16)
#define GEN7_RENDER_OPCODE_3DSTATE_URB_DS (0x32 << 16)
#define GEN7_RENDER_OPCODE_3DSTATE_URB_GS (0x33 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_GATHER_CONSTANT_VS (0x34 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_GATHER_CONSTANT_GS (0x35 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_GATHER_CONSTANT_HS (0x36 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_GATHER_CONSTANT_DS (0x37 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_GATHER_CONSTANT_PS (0x38 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_BINDING_TABLE_EDIT_VS (0x43 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_BINDING_TABLE_EDIT_GS (0x44 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_BINDING_TABLE_EDIT_HS (0x45 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_BINDING_TABLE_EDIT_DS (0x45 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_BINDING_TABLE_EDIT_PS (0x46 << 16)
#define GEN8_RENDER_OPCODE_3DSTATE_VF_INSTANCING (0x49 << 16)
#define GEN8_RENDER_OPCODE_3DSTATE_VF_SGVS (0x4a << 16)
#define GEN8_RENDER_OPCODE_3DSTATE_VF_TOPOLOGY (0x4b << 16)
@@ -130,6 +140,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS (0x116 << 16)
#define GEN7_RENDER_OPCODE_3DSTATE_SO_DECL_LIST (0x117 << 16)
#define GEN7_RENDER_OPCODE_3DSTATE_SO_BUFFER (0x118 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POOL_ALLOC (0x119 << 16)
+#define GEN75_RENDER_OPCODE_3DSTATE_GATHER_POOL_ALLOC (0x11a << 16)
#define GEN8_RENDER_OPCODE_3DSTATE_SAMPLE_PATTERN (0x11c << 16)
#define GEN6_RENDER_OPCODE_PIPE_CONTROL (0x200 << 16)
#define GEN6_RENDER_OPCODE_3DPRIMITIVE (0x300 << 16)
@@ -178,6 +190,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN8_SBA_MOCS__MASK 0x000007f0
#define GEN8_SBA_MOCS__SHIFT 4
#define GEN6_SBA_ADDR_MODIFIED (0x1 << 0)
+#define GEN8_SBA_SIZE__MASK 0xfffff000
+#define GEN8_SBA_SIZE__SHIFT 12
+#define GEN8_SBA_SIZE__SHR 12
+#define GEN8_SBA_SIZE_MODIFIED (0x1 << 0)
#define GEN6_BINDING_TABLE_ADDR__MASK 0x0000ffe0
#define GEN6_BINDING_TABLE_ADDR__SHIFT 5
#define GEN6_BINDING_TABLE_ADDR__SHR 5
diff --git a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h
index 52173fe5d07..c79a4f3a830 100644
--- a/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h
@@ -168,7 +168,6 @@ enum gen_sample_count {
GEN8_NUMSAMPLES_2 = 0x1,
GEN6_NUMSAMPLES_4 = 0x2,
GEN7_NUMSAMPLES_8 = 0x3,
- GEN8_NUMSAMPLES_16 = 0x4,
};
enum gen_inputattr_select {
@@ -297,11 +296,58 @@ enum gen_msrast_mode {
#define GEN7_URB_DW1_OFFSET__MASK 0x3e000000
#define GEN7_URB_DW1_OFFSET__SHIFT 25
+#define GEN75_URB_DW1_OFFSET__MASK 0x7e000000
+#define GEN75_URB_DW1_OFFSET__SHIFT 25
+#define GEN8_URB_DW1_OFFSET__MASK 0xfe000000
+#define GEN8_URB_DW1_OFFSET__SHIFT 25
#define GEN7_URB_DW1_ENTRY_SIZE__MASK 0x01ff0000
#define GEN7_URB_DW1_ENTRY_SIZE__SHIFT 16
#define GEN7_URB_DW1_ENTRY_COUNT__MASK 0x0000ffff
#define GEN7_URB_DW1_ENTRY_COUNT__SHIFT 0
+#define GEN75_3DSTATE_GATHER_CONSTANT_ANY__SIZE 130
+
+
+#define GEN75_GATHER_CONST_DW1_BT_VALID__MASK 0xffff0000
+#define GEN75_GATHER_CONST_DW1_BT_VALID__SHIFT 16
+#define GEN75_GATHER_CONST_DW1_BT_BLOCK__MASK 0x0000f000
+#define GEN75_GATHER_CONST_DW1_BT_BLOCK__SHIFT 12
+
+#define GEN75_GATHER_CONST_DW2_GATHER_BUFFER_OFFSET__MASK 0x007fffc0
+#define GEN75_GATHER_CONST_DW2_GATHER_BUFFER_OFFSET__SHIFT 6
+#define GEN75_GATHER_CONST_DW2_GATHER_BUFFER_OFFSET__SHR 6
+#define GEN8_GATHER_CONST_DW2_DX9_STALL (0x1 << 5)
+#define GEN75_GATHER_CONST_DW2_DX9_ENABLE (0x1 << 4)
+
+#define GEN75_GATHER_CONST_DW_ENTRY_HIGH__MASK 0xffff0000
+#define GEN75_GATHER_CONST_DW_ENTRY_HIGH__SHIFT 16
+#define GEN75_GATHER_CONST_DW_ENTRY_OFFSET__MASK 0x0000ff00
+#define GEN75_GATHER_CONST_DW_ENTRY_OFFSET__SHIFT 8
+#define GEN75_GATHER_CONST_DW_ENTRY_CHANNEL_MASK__MASK 0x000000f0
+#define GEN75_GATHER_CONST_DW_ENTRY_CHANNEL_MASK__SHIFT 4
+#define GEN75_GATHER_CONST_DW_ENTRY_BT_INDEX__MASK 0x0000001f
+#define GEN75_GATHER_CONST_DW_ENTRY_BT_INDEX__SHIFT 0
+
+#define GEN75_3DSTATE_BINDING_TABLE_EDIT_ANY__SIZE 258
+
+
+#define GEN75_BT_EDIT_DW1_BT_BLOCK_CLEAR__MASK 0xffff0000
+#define GEN75_BT_EDIT_DW1_BT_BLOCK_CLEAR__SHIFT 16
+#define GEN75_BT_EDIT_DW1_TARGET__MASK 0x00000003
+#define GEN75_BT_EDIT_DW1_TARGET__SHIFT 0
+#define GEN75_BT_EDIT_DW1_TARGET_CORE0 0x1
+#define GEN75_BT_EDIT_DW1_TARGET_CORE1 0x2
+#define GEN75_BT_EDIT_DW1_TARGET_ALL 0x3
+
+#define GEN75_BT_EDIT_DW_ENTRY_BT_INDEX__MASK 0x00ff0000
+#define GEN75_BT_EDIT_DW_ENTRY_BT_INDEX__SHIFT 16
+#define GEN75_BT_EDIT_DW_ENTRY_SURFACE_STATE_ADDR__MASK 0x0000ffff
+#define GEN75_BT_EDIT_DW_ENTRY_SURFACE_STATE_ADDR__SHIFT 0
+#define GEN75_BT_EDIT_DW_ENTRY_SURFACE_STATE_ADDR__SHR 5
+#define GEN8_BT_EDIT_DW_ENTRY_SURFACE_STATE_ADDR__MASK 0x0000ffff
+#define GEN8_BT_EDIT_DW_ENTRY_SURFACE_STATE_ADDR__SHIFT 0
+#define GEN8_BT_EDIT_DW_ENTRY_SURFACE_STATE_ADDR__SHR 6
+
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_ANY__SIZE 2
@@ -315,6 +361,48 @@ enum gen_msrast_mode {
#define GEN75_PCB_ALLOC_DW1_SIZE__MASK 0x0000003f
#define GEN75_PCB_ALLOC_DW1_SIZE__SHIFT 0
+#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC__SIZE 3
+
+
+#define GEN75_BT_POOL_ALLOC_DW1_ADDR__MASK 0xfffff000
+#define GEN75_BT_POOL_ALLOC_DW1_ADDR__SHIFT 12
+#define GEN75_BT_POOL_ALLOC_DW1_ADDR__SHR 12
+#define GEN75_BT_POOL_ALLOC_DW1_ENABLE (0x1 << 11)
+#define GEN75_BT_POOL_ALLOC_DW1_MOCS__MASK 0x00000780
+#define GEN75_BT_POOL_ALLOC_DW1_MOCS__SHIFT 7
+#define GEN8_BT_POOL_ALLOC_DW1_MOCS__MASK 0x0000007f
+#define GEN8_BT_POOL_ALLOC_DW1_MOCS__SHIFT 0
+
+#define GEN75_BT_POOL_ALLOC_DW2_END_ADDR__MASK 0xfffff000
+#define GEN75_BT_POOL_ALLOC_DW2_END_ADDR__SHIFT 12
+#define GEN75_BT_POOL_ALLOC_DW2_END_ADDR__SHR 12
+
+
+#define GEN8_BT_POOL_ALLOC_DW3_SIZE__MASK 0xfffff000
+#define GEN8_BT_POOL_ALLOC_DW3_SIZE__SHIFT 12
+#define GEN8_BT_POOL_ALLOC_DW3_SIZE__SHR 12
+
+#define GEN75_3DSTATE_GATHER_POOL_ALLOC__SIZE 3
+
+
+#define GEN75_GATHER_POOL_ALLOC_DW1_ADDR__MASK 0xfffff000
+#define GEN75_GATHER_POOL_ALLOC_DW1_ADDR__SHIFT 12
+#define GEN75_GATHER_POOL_ALLOC_DW1_ADDR__SHR 12
+#define GEN75_GATHER_POOL_ALLOC_DW1_ENABLE (0x1 << 11)
+#define GEN75_GATHER_POOL_ALLOC_DW1_MOCS__MASK 0x0000000f
+#define GEN75_GATHER_POOL_ALLOC_DW1_MOCS__SHIFT 0
+#define GEN8_GATHER_POOL_ALLOC_DW1_MOCS__MASK 0x0000007f
+#define GEN8_GATHER_POOL_ALLOC_DW1_MOCS__SHIFT 0
+
+#define GEN75_GATHER_POOL_ALLOC_DW2_END_ADDR__MASK 0xfffff000
+#define GEN75_GATHER_POOL_ALLOC_DW2_END_ADDR__SHIFT 12
+#define GEN75_GATHER_POOL_ALLOC_DW2_END_ADDR__SHR 12
+
+
+#define GEN8_GATHER_POOL_ALLOC_DW3_SIZE__MASK 0xfffff000
+#define GEN8_GATHER_POOL_ALLOC_DW3_SIZE__SHIFT 12
+#define GEN8_GATHER_POOL_ALLOC_DW3_SIZE__SHR 12
+
#define GEN6_3DSTATE_VERTEX_BUFFERS__SIZE 133
@@ -402,15 +490,15 @@ enum gen_msrast_mode {
#define GEN8_SGVS_DW1_IID_ENABLE (0x1 << 31)
-#define GEN8_SGVS_DW1_IID_VE_COMP__MASK 0x60000000
-#define GEN8_SGVS_DW1_IID_VE_COMP__SHIFT 29
-#define GEN8_SGVS_DW1_IID_VE_INDEX__MASK 0x003f0000
-#define GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT 16
+#define GEN8_SGVS_DW1_IID_COMP__MASK 0x60000000
+#define GEN8_SGVS_DW1_IID_COMP__SHIFT 29
+#define GEN8_SGVS_DW1_IID_OFFSET__MASK 0x003f0000
+#define GEN8_SGVS_DW1_IID_OFFSET__SHIFT 16
#define GEN8_SGVS_DW1_VID_ENABLE (0x1 << 15)
-#define GEN8_SGVS_DW1_VID_VE_COMP__MASK 0x00006000
-#define GEN8_SGVS_DW1_VID_VE_COMP__SHIFT 13
-#define GEN8_SGVS_DW1_VID_VE_INDEX__MASK 0x0000003f
-#define GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT 0
+#define GEN8_SGVS_DW1_VID_COMP__MASK 0x00006000
+#define GEN8_SGVS_DW1_VID_COMP__SHIFT 13
+#define GEN8_SGVS_DW1_VID_OFFSET__MASK 0x0000003f
+#define GEN8_SGVS_DW1_VID_OFFSET__SHIFT 0
#define GEN8_3DSTATE_VF_TOPOLOGY__SIZE 2
@@ -464,6 +552,10 @@ enum gen_msrast_mode {
#define GEN7_3DSTATE_POINTERS_ANY__SIZE 2
+#define GEN7_PTR_DW1_ADDR__MASK 0xffffffe0
+#define GEN7_PTR_DW1_ADDR__SHIFT 5
+#define GEN7_PTR_DW1_ADDR__SHR 5
+#define GEN8_PTR_DW1_CHANGED (0x1 << 0)
#define GEN6_3DSTATE_VS__SIZE 9
@@ -513,12 +605,14 @@ enum gen_msrast_mode {
#define GEN8_VS_DW7_CACHE_DISABLE (0x1 << 1)
#define GEN8_VS_DW7_VS_ENABLE (0x1 << 0)
-#define GEN8_VS_DW8_URB_WRITE_OFFSET__MASK 0x03e00000
-#define GEN8_VS_DW8_URB_WRITE_OFFSET__SHIFT 21
-#define GEN8_VS_DW8_URB_WRITE_LEN__MASK 0x001f0000
-#define GEN8_VS_DW8_URB_WRITE_LEN__SHIFT 16
+#define GEN8_VS_DW8_VUE_OUT_READ_OFFSET__MASK 0x07e00000
+#define GEN8_VS_DW8_VUE_OUT_READ_OFFSET__SHIFT 21
+#define GEN8_VS_DW8_VUE_OUT_LEN__MASK 0x001f0000
+#define GEN8_VS_DW8_VUE_OUT_LEN__SHIFT 16
#define GEN8_VS_DW8_UCP_CLIP_ENABLES__MASK 0x0000ff00
#define GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT 8
+#define GEN8_VS_DW8_UCP_CULL_ENABLES__MASK 0x000000ff
+#define GEN8_VS_DW8_UCP_CULL_ENABLES__SHIFT 0
#define GEN7_3DSTATE_HS__SIZE 9
@@ -558,11 +652,11 @@ enum gen_msrast_mode {
-#define GEN8_HS_DW1_DISPATCH_MAX_THREADS__MASK 0x000000ff
-#define GEN8_HS_DW1_DISPATCH_MAX_THREADS__SHIFT 0
#define GEN8_HS_DW2_HS_ENABLE (0x1 << 31)
#define GEN8_HS_DW2_STATISTICS (0x1 << 29)
+#define GEN8_HS_DW2_MAX_THREADS__MASK 0x0001ff00
+#define GEN8_HS_DW2_MAX_THREADS__SHIFT 8
#define GEN8_HS_DW2_INSTANCE_COUNT__MASK 0x0000000f
#define GEN8_HS_DW2_INSTANCE_COUNT__SHIFT 0
@@ -584,9 +678,6 @@ enum gen_msrast_mode {
#define GEN8_HS_DW7_URB_READ_OFFSET__MASK 0x000003f0
#define GEN8_HS_DW7_URB_READ_OFFSET__SHIFT 4
-#define GEN8_HS_DW8_URB_SEMAPHORE_ADDR__MASK 0x00001fff
-#define GEN8_HS_DW8_URB_SEMAPHORE_ADDR__SHIFT 0
-#define GEN8_HS_DW8_URB_SEMAPHORE_ADDR__SHR 6
#define GEN7_3DSTATE_TE__SIZE 4
@@ -660,16 +751,19 @@ enum gen_msrast_mode {
#define GEN8_DS_DW7_MAX_THREADS__MASK 0x3fe00000
#define GEN8_DS_DW7_MAX_THREADS__SHIFT 21
#define GEN8_DS_DW7_STATISTICS (0x1 << 10)
+#define GEN8_DS_DW7_SIMD8_ENABLE (0x1 << 3)
#define GEN8_DS_DW7_COMPUTE_W (0x1 << 2)
#define GEN8_DS_DW7_CACHE_DISABLE (0x1 << 1)
#define GEN8_DS_DW7_DS_ENABLE (0x1 << 0)
-#define GEN8_DS_DW8_URB_WRITE_OFFSET__MASK 0x03e00000
-#define GEN8_DS_DW8_URB_WRITE_OFFSET__SHIFT 21
-#define GEN8_DS_DW8_URB_WRITE_LEN__MASK 0x001f0000
-#define GEN8_DS_DW8_URB_WRITE_LEN__SHIFT 16
+#define GEN8_DS_DW8_VUE_OUT_READ_OFFSET__MASK 0x07e00000
+#define GEN8_DS_DW8_VUE_OUT_READ_OFFSET__SHIFT 21
+#define GEN8_DS_DW8_VUE_OUT_LEN__MASK 0x001f0000
+#define GEN8_DS_DW8_VUE_OUT_LEN__SHIFT 16
#define GEN8_DS_DW8_UCP_CLIP_ENABLES__MASK 0x0000ff00
#define GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT 8
+#define GEN8_DS_DW8_UCP_CULL_ENABLES__MASK 0x000000ff
+#define GEN8_DS_DW8_UCP_CULL_ENABLES__SHIFT 0
@@ -771,7 +865,7 @@ enum gen_msrast_mode {
#define GEN8_GS_DW1_KERNEL_ADDR__SHR 6
-#define GEN8_GS_DW3_EXPECTED_VERTEX_COUNT__MASK 0x0000007f
+#define GEN8_GS_DW3_EXPECTED_VERTEX_COUNT__MASK 0x0000003f
#define GEN8_GS_DW3_EXPECTED_VERTEX_COUNT__SHIFT 0
@@ -815,18 +909,20 @@ enum gen_msrast_mode {
#define GEN8_GS_DW8_GSCTRL__SHIFT 31
#define GEN8_GS_DW8_GSCTRL_CUT (0x0 << 31)
#define GEN8_GS_DW8_GSCTRL_SID (0x1 << 31)
-#define GEN8_GS_DW8_URB_SEMAPHORE_ADDR__MASK 0x00001fff
-#define GEN8_GS_DW8_URB_SEMAPHORE_ADDR__SHIFT 0
-#define GEN8_GS_DW8_URB_SEMAPHORE_ADDR__SHR 6
-#define GEN9_GS_DW8_MAX_THREADS__MASK 0x00001fff
+#define GEN8_GS_DW8_STATIC_OUTPUT (0x1 << 30)
+#define GEN8_GS_DW8_STATIC_OUTPUT_VERTEX_COUNT__MASK 0x07ff0000
+#define GEN8_GS_DW8_STATIC_OUTPUT_VERTEX_COUNT__SHIFT 16
+#define GEN9_GS_DW8_MAX_THREADS__MASK 0x000001ff
#define GEN9_GS_DW8_MAX_THREADS__SHIFT 0
-#define GEN8_GS_DW9_URB_WRITE_OFFSET__MASK 0x03e00000
-#define GEN8_GS_DW9_URB_WRITE_OFFSET__SHIFT 21
-#define GEN8_GS_DW9_URB_WRITE_LEN__MASK 0x001f0000
-#define GEN8_GS_DW9_URB_WRITE_LEN__SHIFT 16
+#define GEN8_GS_DW9_VUE_OUT_READ_OFFSET__MASK 0x07e00000
+#define GEN8_GS_DW9_VUE_OUT_READ_OFFSET__SHIFT 21
+#define GEN8_GS_DW9_VUE_OUT_LEN__MASK 0x001f0000
+#define GEN8_GS_DW9_VUE_OUT_LEN__SHIFT 16
#define GEN8_GS_DW9_UCP_CLIP_ENABLES__MASK 0x0000ff00
#define GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT 8
+#define GEN8_GS_DW9_UCP_CULL_ENABLES__MASK 0x000000ff
+#define GEN8_GS_DW9_UCP_CULL_ENABLES__SHIFT 0
#define GEN7_3DSTATE_STREAMOUT__SIZE 5
@@ -838,6 +934,11 @@ enum gen_msrast_mode {
#define GEN7_SO_DW1_REORDER_MODE__MASK 0x04000000
#define GEN7_SO_DW1_REORDER_MODE__SHIFT 26
#define GEN7_SO_DW1_STATISTICS (0x1 << 25)
+#define GEN8_SO_DW1_FORCE_RENDERING__MASK 0x01800000
+#define GEN8_SO_DW1_FORCE_RENDERING__SHIFT 23
+#define GEN8_SO_DW1_FORCE_RENDERING_NORMAL (0x0 << 23)
+#define GEN8_SO_DW1_FORCE_RENDERING_OFF (0x2 << 23)
+#define GEN8_SO_DW1_FORCE_RENDERING_ON (0x3 << 23)
#define GEN7_SO_DW1_BUFFER_ENABLES__MASK 0x00000f00
#define GEN7_SO_DW1_BUFFER_ENABLES__SHIFT 8
@@ -928,9 +1029,9 @@ enum gen_msrast_mode {
-#define GEN8_SO_BUF_DW5_OFFSET_ADDR__MASK 0xfffffffc
-#define GEN8_SO_BUF_DW5_OFFSET_ADDR__SHIFT 2
-#define GEN8_SO_BUF_DW5_OFFSET_ADDR__SHR 2
+#define GEN8_SO_BUF_DW5_OFFSET_ADDR_ADDR__MASK 0xfffffffc
+#define GEN8_SO_BUF_DW5_OFFSET_ADDR_ADDR__SHIFT 2
+#define GEN8_SO_BUF_DW5_OFFSET_ADDR_ADDR__SHR 2
@@ -939,6 +1040,7 @@ enum gen_msrast_mode {
#define GEN7_CLIP_DW1_FRONT_WINDING__MASK 0x00100000
#define GEN7_CLIP_DW1_FRONT_WINDING__SHIFT 20
+#define GEN8_CLIP_DW1_FORCE_UCP_CULL_ENABLES (0x1 << 20)
#define GEN7_CLIP_DW1_SUBPIXEL__MASK 0x00080000
#define GEN7_CLIP_DW1_SUBPIXEL__SHIFT 19
#define GEN7_CLIP_DW1_SUBPIXEL_8BITS (0x0 << 19)
@@ -946,6 +1048,8 @@ enum gen_msrast_mode {
#define GEN7_CLIP_DW1_EARLY_CULL_ENABLE (0x1 << 18)
#define GEN7_CLIP_DW1_CULL_MODE__MASK 0x00030000
#define GEN7_CLIP_DW1_CULL_MODE__SHIFT 16
+#define GEN8_CLIP_DW1_FORCE_UCP_CLIP_ENABLES (0x1 << 17)
+#define GEN8_CLIP_DW1_FORCE_CLIP_MODE (0x1 << 16)
#define GEN6_CLIP_DW1_STATISTICS (0x1 << 10)
#define GEN6_CLIP_DW1_UCP_CULL_ENABLES__MASK 0x000000ff
#define GEN6_CLIP_DW1_UCP_CULL_ENABLES__SHIFT 0
@@ -1026,6 +1130,7 @@ enum gen_msrast_mode {
#define GEN7_SF_DW3_TRIFAN_PROVOKE__MASK 0x06000000
#define GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT 25
#define GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE (0x1 << 14)
+#define GEN8_SF_DW3_SMOOTH_POINT_ENABLE (0x1 << 13)
#define GEN7_SF_DW3_SUBPIXEL__MASK 0x00001000
#define GEN7_SF_DW3_SUBPIXEL__SHIFT 12
#define GEN7_SF_DW3_SUBPIXEL_8BITS (0x0 << 12)
@@ -1037,8 +1142,8 @@ enum gen_msrast_mode {
#define GEN7_3DSTATE_SBE_DW1__SIZE 13
-#define GEN8_SBE_DW1_USE_URB_READ_LEN (0x1 << 29)
-#define GEN8_SBE_DW1_USE_URB_READ_OFFSET (0x1 << 28)
+#define GEN8_SBE_DW1_FORCE_URB_READ_LEN (0x1 << 29)
+#define GEN8_SBE_DW1_FORCE_URB_READ_OFFSET (0x1 << 28)
#define GEN7_SBE_DW1_ATTR_SWIZZLE__MASK 0x10000000
#define GEN7_SBE_DW1_ATTR_SWIZZLE__SHIFT 28
#define GEN7_SBE_DW1_ATTR_SWIZZLE_0_15 (0x0 << 28)
@@ -1050,21 +1155,28 @@ enum gen_msrast_mode {
#define GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD__SHIFT 20
#define GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT (0x0 << 20)
#define GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT (0x1 << 20)
+#define GEN8_SBE_DW1_PID_OVERRIDE_W (0x1 << 19)
+#define GEN8_SBE_DW1_PID_OVERRIDE_Z (0x1 << 18)
+#define GEN8_SBE_DW1_PID_OVERRIDE_Y (0x1 << 17)
+#define GEN8_SBE_DW1_PID_OVERRIDE_X (0x1 << 16)
#define GEN7_SBE_DW1_URB_READ_LEN__MASK 0x0000f800
#define GEN7_SBE_DW1_URB_READ_LEN__SHIFT 11
#define GEN7_SBE_DW1_URB_READ_OFFSET__MASK 0x000003f0
#define GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT 4
#define GEN8_SBE_DW1_URB_READ_OFFSET__MASK 0x000007e0
#define GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT 5
+#define GEN8_SBE_DW1_PID_OVERRIDE_ATTR__MASK 0x0000001f
+#define GEN8_SBE_DW1_PID_OVERRIDE_ATTR__SHIFT 0
#define GEN8_3DSTATE_SBE_SWIZ_DW1_DW8__SIZE 8
#define GEN8_SBE_SWIZ_HIGH__MASK 0xffff0000
#define GEN8_SBE_SWIZ_HIGH__SHIFT 16
-#define GEN8_SBE_SWIZ_OVERRIDE_W (0x1 << 15)
-#define GEN8_SBE_SWIZ_OVERRIDE_Z (0x1 << 14)
-#define GEN8_SBE_SWIZ_OVERRIDE_Y (0x1 << 13)
-#define GEN8_SBE_SWIZ_OVERRIDE_X (0x1 << 12)
+#define GEN8_SBE_SWIZ_CONST_OVERRIDE_W (0x1 << 15)
+#define GEN8_SBE_SWIZ_CONST_OVERRIDE_Z (0x1 << 14)
+#define GEN8_SBE_SWIZ_CONST_OVERRIDE_Y (0x1 << 13)
+#define GEN8_SBE_SWIZ_CONST_OVERRIDE_X (0x1 << 12)
+#define GEN8_SBE_SWIZ_SWIZZLE_CONTROL (0x1 << 11)
#define GEN8_SBE_SWIZ_CONST__MASK 0x00000600
#define GEN8_SBE_SWIZ_CONST__SHIFT 9
#define GEN8_SBE_SWIZ_CONST_0000 (0x0 << 9)
@@ -1126,12 +1238,28 @@ enum gen_msrast_mode {
#define GEN9_RASTER_DW1_Z_TEST_FAR_ENABLE (0x1 << 26)
+#define GEN8_RASTER_DW1_API__MASK 0x00c00000
+#define GEN8_RASTER_DW1_API__SHIFT 22
+#define GEN8_RASTER_DW1_API_DX9_OGL (0x0 << 22)
+#define GEN8_RASTER_DW1_API_DX10 (0x1 << 22)
+#define GEN8_RASTER_DW1_API_DX10_1 (0x2 << 22)
#define GEN8_RASTER_DW1_FRONT_WINDING__MASK 0x00200000
#define GEN8_RASTER_DW1_FRONT_WINDING__SHIFT 21
+#define GEN8_RASTER_DW1_FORCED_SAMPLE_COUNT__MASK 0x001c0000
+#define GEN8_RASTER_DW1_FORCED_SAMPLE_COUNT__SHIFT 18
+#define GEN8_RASTER_DW1_FORCED_SAMPLE_COUNT_NUMRASTSAMPLES_0 (0x0 << 18)
+#define GEN8_RASTER_DW1_FORCED_SAMPLE_COUNT_NUMRASTSAMPLES_1 (0x1 << 18)
+#define GEN8_RASTER_DW1_FORCED_SAMPLE_COUNT_NUMRASTSAMPLES_2 (0x2 << 18)
+#define GEN8_RASTER_DW1_FORCED_SAMPLE_COUNT_NUMRASTSAMPLES_4 (0x3 << 18)
+#define GEN8_RASTER_DW1_FORCED_SAMPLE_COUNT_NUMRASTSAMPLES_8 (0x4 << 18)
+#define GEN8_RASTER_DW1_FORCED_SAMPLE_COUNT_NUMRASTSAMPLES_16 (0x5 << 18)
#define GEN8_RASTER_DW1_CULL_MODE__MASK 0x00030000
#define GEN8_RASTER_DW1_CULL_MODE__SHIFT 16
+#define GEN8_RASTER_DW1_FORCE_MULTISAMPLE_ENABLE (0x1 << 14)
#define GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE (0x1 << 13)
-#define GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE (0x1 << 12)
+#define GEN8_RASTER_DW1_DX_MULTISAMPLE_ENABLE (0x1 << 12)
+#define GEN8_RASTER_DW1_DX_MSRASTMODE__MASK 0x00000c00
+#define GEN8_RASTER_DW1_DX_MSRASTMODE__SHIFT 10
#define GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID (0x1 << 9)
#define GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME (0x1 << 8)
#define GEN8_RASTER_DW1_DEPTH_OFFSET_POINT (0x1 << 7)
@@ -1223,10 +1351,10 @@ enum gen_msrast_mode {
#define GEN7_WM_DW1_STATISTICS (0x1 << 31)
-#define GEN7_WM_DW1_DEPTH_CLEAR (0x1 << 30)
+#define GEN7_WM_DW1_LEGACY_DEPTH_CLEAR (0x1 << 30)
#define GEN7_WM_DW1_PS_DISPATCH_ENABLE (0x1 << 29)
-#define GEN7_WM_DW1_DEPTH_RESOLVE (0x1 << 28)
-#define GEN7_WM_DW1_HIZ_RESOLVE (0x1 << 27)
+#define GEN7_WM_DW1_LEGACY_DEPTH_RESOLVE (0x1 << 28)
+#define GEN7_WM_DW1_LEGACY_HIZ_RESOLVE (0x1 << 27)
#define GEN7_WM_DW1_LEGACY_LINE_RAST (0x1 << 26)
#define GEN7_WM_DW1_PS_KILL_PIXEL (0x1 << 25)
#define GEN7_WM_DW1_PSCDEPTH__MASK 0x01800000
@@ -1235,6 +1363,11 @@ enum gen_msrast_mode {
#define GEN7_WM_DW1_EDSC__SHIFT 21
#define GEN7_WM_DW1_PS_USE_DEPTH (0x1 << 20)
#define GEN7_WM_DW1_PS_USE_W (0x1 << 19)
+#define GEN8_WM_DW1_FORCE_DISPATCH_ENABLE__MASK 0x00180000
+#define GEN8_WM_DW1_FORCE_DISPATCH_ENABLE__SHIFT 19
+#define GEN8_WM_DW1_FORCE_DISPATCH_ENABLE_NORMAL (0x0 << 19)
+#define GEN8_WM_DW1_FORCE_DISPATCH_ENABLE_OFF (0x1 << 19)
+#define GEN8_WM_DW1_FORCE_DISPATCH_ENABLE_ON (0x2 << 19)
#define GEN7_WM_DW1_ZW_INTERP__MASK 0x00060000
#define GEN7_WM_DW1_ZW_INTERP__SHIFT 17
#define GEN7_WM_DW1_BARYCENTRIC_INTERP__MASK 0x0001f800
@@ -1261,6 +1394,11 @@ enum gen_msrast_mode {
#define GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT (0x1 << 2)
#define GEN7_WM_DW1_MSRASTMODE__MASK 0x00000003
#define GEN7_WM_DW1_MSRASTMODE__SHIFT 0
+#define GEN8_WM_DW1_FORCE_KILL_PIXEL__MASK 0x00000003
+#define GEN8_WM_DW1_FORCE_KILL_PIXEL__SHIFT 0
+#define GEN8_WM_DW1_FORCE_KILL_PIXEL_NORMAL 0x0
+#define GEN8_WM_DW1_FORCE_KILL_PIXEL_OFF 0x1
+#define GEN8_WM_DW1_FORCE_KILL_PIXEL_ON 0x2
#define GEN7_WM_DW2_MSDISPMODE__MASK 0x80000000
#define GEN7_WM_DW2_MSDISPMODE__SHIFT 31
@@ -1271,6 +1409,7 @@ enum gen_msrast_mode {
#define GEN8_3DSTATE_WM_CHROMAKEY__SIZE 2
+#define GEN8_CHROMAKEY_DW1_KILL_ENABLE (0x1 << 31)
#define GEN8_3DSTATE_WM_DEPTH_STENCIL__SIZE 4
@@ -1318,6 +1457,7 @@ enum gen_msrast_mode {
#define GEN8_WM_HZ_DW1_STENCIL_CLEAR (0x1 << 31)
#define GEN8_WM_HZ_DW1_DEPTH_CLEAR (0x1 << 30)
+#define GEN8_WM_HZ_DW1_SCISSOR_ENABLE (0x1 << 29)
#define GEN8_WM_HZ_DW1_DEPTH_RESOLVE (0x1 << 28)
#define GEN8_WM_HZ_DW1_HIZ_RESOLVE (0x1 << 27)
#define GEN8_WM_HZ_DW1_PIXEL_OFFSET_ENABLE (0x1 << 26)
@@ -1443,17 +1583,17 @@ enum gen_msrast_mode {
#define GEN8_PS_BLEND_DW1_ALPHA_TO_COVERAGE (0x1 << 31)
#define GEN8_PS_BLEND_DW1_WRITABLE_RT (0x1 << 30)
-#define GEN8_PS_BLEND_DW1_BLEND_ENABLE (0x1 << 29)
-#define GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR__MASK 0x1f000000
-#define GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR__SHIFT 24
-#define GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR__MASK 0x00f80000
-#define GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR__SHIFT 19
-#define GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR__MASK 0x0007c000
-#define GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR__SHIFT 14
-#define GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR__MASK 0x00003e00
-#define GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR__SHIFT 9
+#define GEN8_PS_BLEND_DW1_RT0_BLEND_ENABLE (0x1 << 29)
+#define GEN8_PS_BLEND_DW1_RT0_SRC_ALPHA_FACTOR__MASK 0x1f000000
+#define GEN8_PS_BLEND_DW1_RT0_SRC_ALPHA_FACTOR__SHIFT 24
+#define GEN8_PS_BLEND_DW1_RT0_DST_ALPHA_FACTOR__MASK 0x00f80000
+#define GEN8_PS_BLEND_DW1_RT0_DST_ALPHA_FACTOR__SHIFT 19
+#define GEN8_PS_BLEND_DW1_RT0_SRC_COLOR_FACTOR__MASK 0x0007c000
+#define GEN8_PS_BLEND_DW1_RT0_SRC_COLOR_FACTOR__SHIFT 14
+#define GEN8_PS_BLEND_DW1_RT0_DST_COLOR_FACTOR__MASK 0x00003e00
+#define GEN8_PS_BLEND_DW1_RT0_DST_COLOR_FACTOR__SHIFT 9
#define GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE (0x1 << 8)
-#define GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE (0x1 << 7)
+#define GEN8_PS_BLEND_DW1_RT0_INDEPENDENT_ALPHA_ENABLE (0x1 << 7)
#define GEN6_3DSTATE_CONSTANT_ANY__SIZE 11
@@ -1469,6 +1609,8 @@ enum gen_msrast_mode {
#define GEN6_CONSTANT_DW_ADDR_ADDR__SHR 5
+#define GEN8_CONSTANT_DW0_MOCS__MASK 0x00007f00
+#define GEN8_CONSTANT_DW0_MOCS__SHIFT 8
#define GEN7_CONSTANT_DW1_BUFFER1_READ_LEN__MASK 0xffff0000
#define GEN7_CONSTANT_DW1_BUFFER1_READ_LEN__SHIFT 16
@@ -1502,6 +1644,8 @@ enum gen_msrast_mode {
#define GEN6_3DSTATE_DRAWING_RECTANGLE__SIZE 4
+#define GEN8_DRAWING_RECTANGLE_DW0_CORE_MODE_SELECT__MASK 0x0000c000
+#define GEN8_DRAWING_RECTANGLE_DW0_CORE_MODE_SELECT__SHIFT 14
#define GEN6_DRAWING_RECTANGLE_DW1_MIN_Y__MASK 0xffff0000
#define GEN6_DRAWING_RECTANGLE_DW1_MIN_Y__SHIFT 16
@@ -1624,15 +1768,12 @@ enum gen_msrast_mode {
#define GEN8_DEPTH_DW5_MOCS__MASK 0x0000007f
#define GEN8_DEPTH_DW5_MOCS__SHIFT 0
-#define GEN8_DEPTH_DW6_OFFSET_Y__MASK 0xffff0000
-#define GEN8_DEPTH_DW6_OFFSET_Y__SHIFT 16
-#define GEN8_DEPTH_DW6_OFFSET_X__MASK 0x0000ffff
-#define GEN8_DEPTH_DW6_OFFSET_X__SHIFT 0
#define GEN8_DEPTH_DW7_RT_VIEW_EXTENT__MASK 0xffe00000
#define GEN8_DEPTH_DW7_RT_VIEW_EXTENT__SHIFT 21
#define GEN8_DEPTH_DW7_QPITCH__MASK 0x00007fff
#define GEN8_DEPTH_DW7_QPITCH__SHIFT 0
+#define GEN8_DEPTH_DW7_QPITCH__SHR 2
#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET__SIZE 2
@@ -1649,6 +1790,11 @@ enum gen_msrast_mode {
#define GEN6_3DSTATE_LINE_STIPPLE__SIZE 3
+#define GEN6_LINE_STIPPLE_DW1_CURRENT_MODIFY_ENABLE (0x1 << 31)
+#define GEN6_LINE_STIPPLE_DW1_CURRENT_REPEAT_COUNTER__MASK 0x3fe00000
+#define GEN6_LINE_STIPPLE_DW1_CURRENT_REPEAT_COUNTER__SHIFT 21
+#define GEN6_LINE_STIPPLE_DW1_CURRENT_STIPPLE_INDEX__MASK 0x000f0000
+#define GEN6_LINE_STIPPLE_DW1_CURRENT_STIPPLE_INDEX__SHIFT 16
#define GEN6_LINE_STIPPLE_DW1_PATTERN__MASK 0x0000ffff
#define GEN6_LINE_STIPPLE_DW1_PATTERN__SHIFT 0
@@ -1664,16 +1810,28 @@ enum gen_msrast_mode {
#define GEN6_3DSTATE_AA_LINE_PARAMETERS__SIZE 3
+#define GEN8_AA_LINE_DW1_POINT_BIAS__MASK 0xff000000
+#define GEN8_AA_LINE_DW1_POINT_BIAS__SHIFT 24
+#define GEN8_AA_LINE_DW1_POINT_BIAS__RADIX 8
#define GEN6_AA_LINE_DW1_BIAS__MASK 0x00ff0000
#define GEN6_AA_LINE_DW1_BIAS__SHIFT 16
#define GEN6_AA_LINE_DW1_BIAS__RADIX 8
+#define GEN8_AA_LINE_DW1_POINT_SLOPE__MASK 0x0000ff00
+#define GEN8_AA_LINE_DW1_POINT_SLOPE__SHIFT 8
+#define GEN8_AA_LINE_DW1_POINT_SLOPE__RADIX 8
#define GEN6_AA_LINE_DW1_SLOPE__MASK 0x000000ff
#define GEN6_AA_LINE_DW1_SLOPE__SHIFT 0
#define GEN6_AA_LINE_DW1_SLOPE__RADIX 8
+#define GEN8_AA_LINE_DW2_POINT_CAP_BIAS__MASK 0xff000000
+#define GEN8_AA_LINE_DW2_POINT_CAP_BIAS__SHIFT 24
+#define GEN8_AA_LINE_DW2_POINT_CAP_BIAS__RADIX 8
#define GEN6_AA_LINE_DW2_CAP_BIAS__MASK 0x00ff0000
#define GEN6_AA_LINE_DW2_CAP_BIAS__SHIFT 16
#define GEN6_AA_LINE_DW2_CAP_BIAS__RADIX 8
+#define GEN8_AA_LINE_DW2_POINT_CAP_SLOPE__MASK 0x0000ff00
+#define GEN8_AA_LINE_DW2_POINT_CAP_SLOPE__SHIFT 8
+#define GEN8_AA_LINE_DW2_POINT_CAP_SLOPE__RADIX 8
#define GEN6_AA_LINE_DW2_CAP_SLOPE__MASK 0x000000ff
#define GEN6_AA_LINE_DW2_CAP_SLOPE__SHIFT 0
#define GEN6_AA_LINE_DW2_CAP_SLOPE__RADIX 8
@@ -1690,7 +1848,7 @@ enum gen_msrast_mode {
#define GEN6_3DSTATE_MULTISAMPLE__SIZE 4
-#define GEN75_MULTISAMPLE_DW1_DX9_MULTISAMPLE_ENABLE (0x1 << 5)
+#define GEN75_MULTISAMPLE_DW1_PIXEL_OFFSET_ENABLE (0x1 << 5)
#define GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__MASK 0x00000010
#define GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__SHIFT 4
#define GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__MASK 0x0000000e
@@ -1724,6 +1882,7 @@ enum gen_msrast_mode {
#define GEN8_STENCIL_DW4_QPITCH__MASK 0x00007fff
#define GEN8_STENCIL_DW4_QPITCH__SHIFT 0
+#define GEN8_STENCIL_DW4_QPITCH__SHR 2
#define GEN6_3DSTATE_HIER_DEPTH_BUFFER__SIZE 5
@@ -1739,6 +1898,7 @@ enum gen_msrast_mode {
#define GEN8_HIZ_DW4_QPITCH__MASK 0x00007fff
#define GEN8_HIZ_DW4_QPITCH__SHIFT 0
+#define GEN8_HIZ_DW4_QPITCH__SHR 2
#define GEN6_3DSTATE_CLEAR_PARAMS__SIZE 3
diff --git a/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h
index b65b704adc6..b2c2142af78 100644
--- a/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h
@@ -430,8 +430,10 @@ enum gen_key_filter {
#define GEN7_SAMPLER_DW0_BORDER_COLOR_MODE_DX9 (0x1 << 29)
#define GEN6_SAMPLER_DW0_LOD_PRECLAMP_ENABLE (0x1 << 28)
#define GEN6_SAMPLER_DW0_MIN_MAG_NOT_EQUAL (0x1 << 27)
-#define GEN8_SAMPLER_DW0_LOD_PRECLAMP_ENABLE__MASK 0x18000000
-#define GEN8_SAMPLER_DW0_LOD_PRECLAMP_ENABLE__SHIFT 27
+#define GEN8_SAMPLER_DW0_LOD_PRECLAMP_MODE__MASK 0x18000000
+#define GEN8_SAMPLER_DW0_LOD_PRECLAMP_MODE__SHIFT 27
+#define GEN8_SAMPLER_DW0_LOD_PRECLAMP_MODE_NONE (0x0 << 27)
+#define GEN8_SAMPLER_DW0_LOD_PRECLAMP_MODE_OGL (0x2 << 27)
#define GEN6_SAMPLER_DW0_BASE_LOD__MASK 0x07c00000
#define GEN6_SAMPLER_DW0_BASE_LOD__SHIFT 22
#define GEN6_SAMPLER_DW0_BASE_LOD__RADIX 1
@@ -493,23 +495,11 @@ enum gen_key_filter {
#define GEN6_SAMPLER_DW2_BORDER_COLOR_ADDR__SHIFT 5
#define GEN6_SAMPLER_DW2_BORDER_COLOR_ADDR__SHR 5
-#define GEN8_SAMPLER_DW2_SEP_FILTER_COEFF_TABLE_SIZE__MASK 0xc0000000
-#define GEN8_SAMPLER_DW2_SEP_FILTER_COEFF_TABLE_SIZE__SHIFT 30
-#define GEN8_SAMPLER_DW2_SEP_FILTER_WIDTH__MASK 0x30000000
-#define GEN8_SAMPLER_DW2_SEP_FILTER_WIDTH__SHIFT 28
-#define GEN8_SAMPLER_DW2_SEP_FILTER_HEIGHT__MASK 0x0c000000
-#define GEN8_SAMPLER_DW2_SEP_FILTER_HEIGHT__SHIFT 26
#define GEN8_SAMPLER_DW2_INDIRECT_STATE_ADDR__MASK 0x00ffffc0
#define GEN8_SAMPLER_DW2_INDIRECT_STATE_ADDR__SHIFT 6
#define GEN8_SAMPLER_DW2_INDIRECT_STATE_ADDR__SHR 6
-#define GEN8_SAMPLER_DW2_FLEXIBLE_FILTER_MODE (0x1 << 4)
-#define GEN8_SAMPLER_DW2_FLEXIBLE_FILTER_COEFF_SIZE (0x1 << 3)
-#define GEN8_SAMPLER_DW2_FLEXIBLE_FILTER_HALIGN (0x1 << 2)
-#define GEN8_SAMPLER_DW2_FLEXIBLE_FILTER_VALIGN (0x1 << 1)
#define GEN8_SAMPLER_DW2_LOD_CLAMP_MAG_MODE (0x1 << 0)
-#define GEN8_SAMPLER_DW3_NON_SEP_FILTER_FOOTPRINT_MASK__MASK 0xff000000
-#define GEN8_SAMPLER_DW3_NON_SEP_FILTER_FOOTPRINT_MASK__SHIFT 24
#define GEN6_SAMPLER_DW3_CHROMAKEY_ENABLE (0x1 << 25)
#define GEN6_SAMPLER_DW3_CHROMAKEY_INDEX__MASK 0x01800000
#define GEN6_SAMPLER_DW3_CHROMAKEY_INDEX__SHIFT 23
diff --git a/src/gallium/drivers/ilo/genhw/gen_render_media.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_media.xml.h
index 55d830bad32..2476002ec91 100644
--- a/src/gallium/drivers/ilo/genhw/gen_render_media.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_render_media.xml.h
@@ -111,6 +111,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN8_IDRT_DW5_CURBE_READ_LEN__MASK 0xffff0000
#define GEN8_IDRT_DW5_CURBE_READ_LEN__SHIFT 16
+#define GEN8_IDRT_DW5_CURBE_READ_OFFSET__MASK 0x0000ffff
+#define GEN8_IDRT_DW5_CURBE_READ_OFFSET__SHIFT 0
#define GEN8_IDRT_DW6_ROUNDING_MODE__MASK 0x00c00000
#define GEN8_IDRT_DW6_ROUNDING_MODE__SHIFT 22
@@ -121,7 +123,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN8_IDRT_DW6_BARRIER_ENABLE (0x1 << 21)
#define GEN8_IDRT_DW6_SLM_SIZE__MASK 0x001f0000
#define GEN8_IDRT_DW6_SLM_SIZE__SHIFT 16
-#define GEN8_IDRT_DW6_THREAD_GROUP_SIZE__MASK 0x000000ff
+#define GEN8_IDRT_DW6_THREAD_GROUP_SIZE__MASK 0x000003ff
#define GEN8_IDRT_DW6_THREAD_GROUP_SIZE__SHIFT 0
#define GEN8_IDRT_DW7_CROSS_THREAD_CURBE_READ_LEN__MASK 0x000000ff
@@ -280,6 +282,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GEN8_GPGPU_DW1_IDRT_OFFSET__MASK 0x0000003f
#define GEN8_GPGPU_DW1_IDRT_OFFSET__SHIFT 0
+#define GEN8_GPGPU_DW2_INDIRECT_LEN__MASK 0x0001ffff
+#define GEN8_GPGPU_DW2_INDIRECT_LEN__SHIFT 0
#define GEN8_GPGPU_DW3_INDIRECT_ADDR__MASK 0xffffffe0
#define GEN8_GPGPU_DW3_INDIRECT_ADDR__SHIFT 5
diff --git a/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h b/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h
index b5d09f64429..c180450ce27 100644
--- a/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h
+++ b/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h
@@ -388,7 +388,7 @@ enum gen_surface_scs {
#define GEN8_SURFACE_DW0_TILING__SHIFT 12
#define GEN8_SURFACE_DW0_VSTRIDE (0x1 << 11)
#define GEN8_SURFACE_DW0_VSTRIDE_OFFSET (0x1 << 10)
-#define GEN8_SURFACE_DW0_SAMPLER_L2_BYPASS_MODE (0x1 << 9)
+#define GEN8_SURFACE_DW0_SAMPLER_L2_BYPASS_DISABLE (0x1 << 9)
#define GEN7_SURFACE_DW0_RENDER_CACHE_RW (0x1 << 8)
#define GEN7_SURFACE_DW0_MEDIA_BOUNDARY_PIXEL_MODE__MASK 0x000000c0
#define GEN7_SURFACE_DW0_MEDIA_BOUNDARY_PIXEL_MODE__SHIFT 6
@@ -402,6 +402,7 @@ enum gen_surface_scs {
#define GEN8_SURFACE_DW1_BASE_LOD__SHIFT 19
#define GEN8_SURFACE_DW1_QPITCH__MASK 0x00007fff
#define GEN8_SURFACE_DW1_QPITCH__SHIFT 0
+#define GEN8_SURFACE_DW1_QPITCH__SHR 2
#define GEN7_SURFACE_DW2_HEIGHT__MASK 0x3fff0000
#define GEN7_SURFACE_DW2_HEIGHT__SHIFT 16
@@ -434,7 +435,6 @@ enum gen_surface_scs {
#define GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2 (0x1 << 3)
#define GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4 (0x2 << 3)
#define GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8 (0x3 << 3)
-#define GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16 (0x4 << 3)
#define GEN7_SURFACE_DW4_MSPOS_INDEX__MASK 0x00000007
#define GEN7_SURFACE_DW4_MSPOS_INDEX__SHIFT 0
#define GEN7_SURFACE_DW4_MIN_ARRAY_ELEMENT_STRBUF__MASK 0x07ffffff
@@ -451,8 +451,11 @@ enum gen_surface_scs {
#define GEN8_SURFACE_DW5_Y_OFFSET__MASK 0x00e00000
#define GEN8_SURFACE_DW5_Y_OFFSET__SHIFT 21
#define GEN8_SURFACE_DW5_Y_OFFSET__SHR 1
-#define GEN8_SURFACE_DW5_CUBE_EWA (0x1 << 20)
-#define GEN8_SURFACE_DW5_COHERENCY_TYPE (0x1 << 14)
+#define GEN8_SURFACE_DW5_CUBE_EWA_DISABLE (0x1 << 20)
+#define GEN8_SURFACE_DW5_COHERENCY_TYPE__MASK 0x00004000
+#define GEN8_SURFACE_DW5_COHERENCY_TYPE__SHIFT 14
+#define GEN8_SURFACE_DW5_COHERENCY_TYPE_GPU (0x0 << 14)
+#define GEN8_SURFACE_DW5_COHERENCY_TYPE_IA (0x1 << 14)
#define GEN7_SURFACE_DW5_MIN_LOD__MASK 0x000000f0
#define GEN7_SURFACE_DW5_MIN_LOD__SHIFT 4
#define GEN7_SURFACE_DW5_MIP_COUNT_LOD__MASK 0x0000000f
@@ -463,22 +466,23 @@ enum gen_surface_scs {
#define GEN7_SURFACE_DW6_UV_X_OFFSET__SHIFT 16
#define GEN7_SURFACE_DW6_UV_Y_OFFSET__MASK 0x00003fff
#define GEN7_SURFACE_DW6_UV_Y_OFFSET__SHIFT 0
+#define GEN7_SURFACE_DW6_APPEND_COUNTER_ADDR__MASK 0xffffffc0
+#define GEN7_SURFACE_DW6_APPEND_COUNTER_ADDR__SHIFT 6
+#define GEN7_SURFACE_DW6_APPEND_COUNTER_ADDR__SHR 6
#define GEN7_SURFACE_DW6_MCS_ADDR__MASK 0xfffff000
#define GEN7_SURFACE_DW6_MCS_ADDR__SHIFT 12
#define GEN7_SURFACE_DW6_MCS_ADDR__SHR 12
#define GEN8_SURFACE_DW6_AUX_QPITCH__MASK 0x7fff0000
#define GEN8_SURFACE_DW6_AUX_QPITCH__SHIFT 16
+#define GEN8_SURFACE_DW6_AUX_QPITCH__SHR 2
#define GEN7_SURFACE_DW6_AUX_PITCH__MASK 0x00000ff8
#define GEN7_SURFACE_DW6_AUX_PITCH__SHIFT 3
-#define GEN7_SURFACE_DW6_APPEND_COUNTER_ADDR__MASK 0xffffffc0
-#define GEN7_SURFACE_DW6_APPEND_COUNTER_ADDR__SHIFT 6
-#define GEN7_SURFACE_DW6_APPEND_COUNTER_ADDR__SHR 6
-#define GEN7_SURFACE_DW6_AUX_MODE__MASK 0x00000007
-#define GEN7_SURFACE_DW6_AUX_MODE__SHIFT 0
-#define GEN7_SURFACE_DW6_AUX_MODE_NONE 0x0
-#define GEN7_SURFACE_DW6_AUX_MODE_MCS 0x1
-#define GEN7_SURFACE_DW6_AUX_MODE_APPEND 0x2
-#define GEN8_SURFACE_DW6_AUX_MODE_HIZ 0x3
+#define GEN7_SURFACE_DW6_AUX__MASK 0x00000007
+#define GEN7_SURFACE_DW6_AUX__SHIFT 0
+#define GEN7_SURFACE_DW6_AUX_NONE 0x0
+#define GEN7_SURFACE_DW6_AUX_MCS 0x1
+#define GEN7_SURFACE_DW6_AUX_APPEND 0x2
+#define GEN8_SURFACE_DW6_AUX_HIZ 0x3
#define GEN7_SURFACE_DW7_CC_R__MASK 0x80000000
#define GEN7_SURFACE_DW7_CC_R__SHIFT 31
@@ -504,6 +508,12 @@ enum gen_surface_scs {
+#define GEN8_SURFACE_DW11_V_X_OFFSET__MASK 0x3fff0000
+#define GEN8_SURFACE_DW11_V_X_OFFSET__SHIFT 16
+#define GEN8_SURFACE_DW11_V_Y_OFFSET__MASK 0x00003fff
+#define GEN8_SURFACE_DW11_V_Y_OFFSET__SHIFT 0
+#define GEN8_SURFACE_DW11_AUX_ADDR_HI__MASK 0xffffffff
+#define GEN8_SURFACE_DW11_AUX_ADDR_HI__SHIFT 0
diff --git a/src/gallium/drivers/ilo/ilo_common.h b/src/gallium/drivers/ilo/ilo_common.h
index 3dbe79fb872..d3016590551 100644
--- a/src/gallium/drivers/ilo/ilo_common.h
+++ b/src/gallium/drivers/ilo/ilo_common.h
@@ -34,6 +34,7 @@
#include "util/list.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
+#include "util/u_memory.h"
#include "util/u_pointer.h"
#include "core/ilo_core.h"
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index 5f2b01017e2..73b625e9de4 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -987,15 +987,6 @@ ilo_shader_destroy(struct ilo_shader_state *shader)
}
/**
- * Return the type (PIPE_SHADER_x) of the shader.
- */
-int
-ilo_shader_get_type(const struct ilo_shader_state *shader)
-{
- return shader->info.type;
-}
-
-/**
* Select a kernel for the given context. This will compile a new kernel if
* none of the existing kernels work with the context.
*
@@ -1257,9 +1248,6 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
case ILO_KERNEL_SAMPLER_COUNT:
val = shader->info.num_samplers;
break;
- case ILO_KERNEL_URB_DATA_START_REG:
- val = kernel->in.start_grf;
- break;
case ILO_KERNEL_SKIP_CBUF0_UPLOAD:
val = kernel->skip_cbuf0_upload;
break;
@@ -1311,9 +1299,6 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
case ILO_KERNEL_VS_GEN6_SO:
val = kernel->stream_output;
break;
- case ILO_KERNEL_VS_GEN6_SO_START_REG:
- val = kernel->gs_start_grf;
- break;
case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET:
val = kernel->gs_offsets[0];
break;
@@ -1340,16 +1325,6 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
val = kernel->bt.gen6_so_count;
break;
- case ILO_KERNEL_FS_INPUT_Z:
- case ILO_KERNEL_FS_INPUT_W:
- val = kernel->in.has_pos;
- break;
- case ILO_KERNEL_FS_OUTPUT_Z:
- val = kernel->out.has_pos;
- break;
- case ILO_KERNEL_FS_USE_KILL:
- val = kernel->has_kill;
- break;
case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS:
val = kernel->in.barycentric_interpolation_mode;
break;
diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h
index d9f02a4746a..01de54146b1 100644
--- a/src/gallium/drivers/ilo/ilo_shader.h
+++ b/src/gallium/drivers/ilo/ilo_shader.h
@@ -36,7 +36,6 @@ enum ilo_kernel_param {
ILO_KERNEL_INPUT_COUNT,
ILO_KERNEL_OUTPUT_COUNT,
ILO_KERNEL_SAMPLER_COUNT,
- ILO_KERNEL_URB_DATA_START_REG,
ILO_KERNEL_SKIP_CBUF0_UPLOAD,
ILO_KERNEL_PCB_CBUF0_SIZE,
@@ -53,7 +52,6 @@ enum ilo_kernel_param {
ILO_KERNEL_VS_INPUT_EDGEFLAG,
ILO_KERNEL_VS_PCB_UCP_SIZE,
ILO_KERNEL_VS_GEN6_SO,
- ILO_KERNEL_VS_GEN6_SO_START_REG,
ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET,
ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET,
ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET,
@@ -64,10 +62,6 @@ enum ilo_kernel_param {
ILO_KERNEL_GS_GEN6_SURFACE_SO_BASE,
ILO_KERNEL_GS_GEN6_SURFACE_SO_COUNT,
- ILO_KERNEL_FS_INPUT_Z,
- ILO_KERNEL_FS_INPUT_W,
- ILO_KERNEL_FS_OUTPUT_Z,
- ILO_KERNEL_FS_USE_KILL,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS,
ILO_KERNEL_FS_DISPATCH_16_OFFSET,
ILO_KERNEL_FS_SURFACE_RT_BASE,
@@ -149,9 +143,6 @@ ilo_shader_create_cs(const struct ilo_dev *dev,
void
ilo_shader_destroy(struct ilo_shader_state *shader);
-int
-ilo_shader_get_type(const struct ilo_shader_state *shader);
-
bool
ilo_shader_select_kernel(struct ilo_shader_state *shader,
const struct ilo_state_vector *vec,
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 9346ea3204d..c18e9f5b435 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -151,6 +151,15 @@ NVC0_C_SOURCES := \
nvc0/nvc0_program.c \
nvc0/nvc0_program.h \
nvc0/nvc0_query.c \
+ nvc0/nvc0_query.h \
+ nvc0/nvc0_query_hw.c \
+ nvc0/nvc0_query_hw.h \
+ nvc0/nvc0_query_hw_metric.c \
+ nvc0/nvc0_query_hw_metric.h \
+ nvc0/nvc0_query_hw_sm.c \
+ nvc0/nvc0_query_hw_sm.h \
+ nvc0/nvc0_query_sw.c \
+ nvc0/nvc0_query_sw.h \
nvc0/nvc0_resource.c \
nvc0/nvc0_resource.h \
nvc0/nvc0_screen.c \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 400b9f09e51..7859c8e79bd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -25,10 +25,24 @@
#include <stack>
#include <limits>
+#if __cplusplus >= 201103L
+#include <unordered_map>
+#else
#include <tr1/unordered_map>
+#endif
namespace nv50_ir {
+#if __cplusplus >= 201103L
+using std::hash;
+using std::unordered_map;
+#elif !defined(ANDROID)
+using std::tr1::hash;
+using std::tr1::unordered_map;
+#else
+#error Android release before Lollipop is not supported!
+#endif
+
#define MAX_REGISTER_FILE_SIZE 256
class RegisterSet
@@ -349,12 +363,12 @@ RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
struct PhiMapHash {
size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
- return std::tr1::hash<Instruction*>()(val.first) * 31 +
- std::tr1::hash<BasicBlock*>()(val.second);
+ return hash<Instruction*>()(val.first) * 31 +
+ hash<BasicBlock*>()(val.second);
}
};
-typedef std::tr1::unordered_map<
+typedef unordered_map<
std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
// Critical edges need to be split up so that work can be inserted along
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
index ee4e08dd520..21cf2b9ae5e 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -190,8 +190,14 @@ nouveau_fence_wait(struct nouveau_fence *fence)
/* wtf, someone is waiting on a fence in flush_notify handler? */
assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
- if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
- nouveau_fence_emit(fence);
+ if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
+ PUSH_SPACE(screen->pushbuf, 8);
+ /* The space allocation might trigger a flush, which could emit the
+ * current fence. So check again.
+ */
+ if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
+ nouveau_fence_emit(fence);
+ }
if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
@@ -224,8 +230,12 @@ nouveau_fence_wait(struct nouveau_fence *fence)
void
nouveau_fence_next(struct nouveau_screen *screen)
{
- if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING)
- nouveau_fence_emit(screen->fence.current);
+ if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) {
+ if (screen->fence.current->ref > 1)
+ nouveau_fence_emit(screen->fence.current);
+ else
+ return;
+ }
nouveau_fence_ref(NULL, &screen->fence.current);
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
index 5757eb1fb16..dbbb8baad79 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
@@ -1,3 +1,4 @@
+#include <strings.h>
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index fdde11f4cd5..941555ffbf8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50)
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
}
while (words) {
- unsigned nr;
-
- if (!PUSH_SPACE(push, 16))
- break;
- nr = PUSH_AVAIL(push);
- assert(nr >= 16);
- nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
+ unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
+ PUSH_SPACE(push, nr + 3);
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
PUSH_DATA (push, (start << 8) | b);
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
index be514077d32..9a3fd1e705f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
PUSH_DATA (push, 0);
while (count) {
- unsigned nr;
-
- if (!PUSH_SPACE(push, 16))
- break;
- nr = PUSH_AVAIL(push);
- assert(nr >= 16);
- nr = MIN2(count, nr - 1);
- nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+ unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
PUSH_DATAp(push, src, nr);
@@ -395,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv,
nouveau_pushbuf_validate(push);
while (words) {
- unsigned nr;
-
- nr = PUSH_AVAIL(push);
- nr = MIN2(nr - 7, words);
- nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+ unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
+ PUSH_SPACE(push, nr + 7);
BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
PUSH_DATAh(push, bo->offset + base);
PUSH_DATA (push, bo->offset + base);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 47bd123621b..e33af042620 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -37,12 +37,9 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
switch (dev->chipset & ~0xf) {
case 0xc0:
- if (dev->chipset == 0xc8)
- obj_class = NVC8_COMPUTE_CLASS;
- else
- obj_class = NVC0_COMPUTE_CLASS;
- break;
case 0xd0:
+ /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
+ * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
obj_class = NVC0_COMPUTE_CLASS;
break;
default:
@@ -108,14 +105,6 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
- /* bind parameters buffer */
- BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
- PUSH_DATA (push, screen->parm->size);
- PUSH_DATAh(push, screen->parm->offset);
- PUSH_DATA (push, screen->parm->offset);
- BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
- PUSH_DATA (push, (0 << 8) | 1);
-
/* TODO: textures & samplers */
return 0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 30bee3a0f8c..4af83c53224 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -15,6 +15,7 @@
#include "nvc0/nvc0_screen.h"
#include "nvc0/nvc0_program.h"
#include "nvc0/nvc0_resource.h"
+#include "nvc0/nvc0_query.h"
#include "nv50/nv50_transfer.h"
@@ -231,17 +232,6 @@ uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
uint32_t label);
void nvc0_program_init_tcp_empty(struct nvc0_context *);
-/* nvc0_query.c */
-void nvc0_init_query_functions(struct nvc0_context *);
-void nvc0_query_pushbuf_submit(struct nouveau_pushbuf *,
- struct pipe_query *, unsigned result_offset);
-void nvc0_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
-void nvc0_so_target_save_offset(struct pipe_context *,
- struct pipe_stream_output_target *, unsigned i,
- bool *serialize);
-
-#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
-
/* nvc0_shader_state.c */
void nvc0_vertprog_validate(struct nvc0_context *);
void nvc0_tctlprog_validate(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index b13df6a9485..e4752e2dbc5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -25,519 +25,51 @@
#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
#include "nvc0/nvc0_context.h"
-#include "nv_object.xml.h"
-#include "nvc0/nve4_compute.xml.h"
-#include "nvc0/nvc0_compute.xml.h"
-
-#define NVC0_QUERY_STATE_READY 0
-#define NVC0_QUERY_STATE_ACTIVE 1
-#define NVC0_QUERY_STATE_ENDED 2
-#define NVC0_QUERY_STATE_FLUSHED 3
-
-struct nvc0_query {
- uint32_t *data;
- uint16_t type;
- uint16_t index;
- int8_t ctr[4];
- uint32_t sequence;
- struct nouveau_bo *bo;
- uint32_t base;
- uint32_t offset; /* base + i * rotate */
- uint8_t state;
- bool is64bit;
- uint8_t rotate;
- int nesting; /* only used for occlusion queries */
- union {
- struct nouveau_mm_allocation *mm;
- uint64_t value;
- } u;
- struct nouveau_fence *fence;
-};
-
-#define NVC0_QUERY_ALLOC_SPACE 256
-
-static boolean nvc0_hw_sm_query_begin(struct nvc0_context *,
- struct nvc0_query *);
-static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *);
-static boolean nvc0_hw_sm_query_result(struct nvc0_context *,
- struct nvc0_query *, void *, boolean);
-
-static inline struct nvc0_query *
-nvc0_query(struct pipe_query *pipe)
-{
- return (struct nvc0_query *)pipe;
-}
-
-static bool
-nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
-{
- struct nvc0_screen *screen = nvc0->screen;
- int ret;
-
- if (q->bo) {
- nouveau_bo_ref(NULL, &q->bo);
- if (q->u.mm) {
- if (q->state == NVC0_QUERY_STATE_READY)
- nouveau_mm_free(q->u.mm);
- else
- nouveau_fence_work(screen->base.fence.current,
- nouveau_mm_free_work, q->u.mm);
- }
- }
- if (size) {
- q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
- if (!q->bo)
- return false;
- q->offset = q->base;
-
- ret = nouveau_bo_map(q->bo, 0, screen->base.client);
- if (ret) {
- nvc0_query_allocate(nvc0, q, 0);
- return false;
- }
- q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
- }
- return true;
-}
-
-static void
-nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
-{
- nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
- nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
- FREE(nvc0_query(pq));
-}
+#include "nvc0/nvc0_query.h"
+#include "nvc0/nvc0_query_sw.h"
+#include "nvc0/nvc0_query_hw.h"
+#include "nvc0/nvc0_query_hw_sm.h"
static struct pipe_query *
-nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
+nvc0_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nvc0_query *q;
- unsigned space = NVC0_QUERY_ALLOC_SPACE;
- q = CALLOC_STRUCT(nvc0_query);
+ q = nvc0_sw_create_query(nvc0, type, index);
if (!q)
- return NULL;
-
- switch (type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- q->rotate = 32;
- space = NVC0_QUERY_ALLOC_SPACE;
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- q->is64bit = true;
- space = 512;
- break;
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- q->is64bit = true;
- space = 64;
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- q->is64bit = true;
- q->index = index;
- space = 32;
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- case PIPE_QUERY_TIMESTAMP:
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- case PIPE_QUERY_GPU_FINISHED:
- space = 32;
- break;
- case NVC0_QUERY_TFB_BUFFER_OFFSET:
- space = 16;
- break;
- default:
-#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
- if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) {
- space = 0;
- q->is64bit = true;
- q->index = type - NVC0_QUERY_DRV_STAT(0);
- break;
- } else
-#endif
- if (nvc0->screen->base.device->drm_version >= 0x01000101) {
- if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) {
- /* for each MP:
- * [00] = WS0.C0
- * [04] = WS0.C1
- * [08] = WS0.C2
- * [0c] = WS0.C3
- * [10] = WS1.C0
- * [14] = WS1.C1
- * [18] = WS1.C2
- * [1c] = WS1.C3
- * [20] = WS2.C0
- * [24] = WS2.C1
- * [28] = WS2.C2
- * [2c] = WS2.C3
- * [30] = WS3.C0
- * [34] = WS3.C1
- * [38] = WS3.C2
- * [3c] = WS3.C3
- * [40] = MP.C4
- * [44] = MP.C5
- * [48] = MP.C6
- * [4c] = MP.C7
- * [50] = WS0.sequence
- * [54] = WS1.sequence
- * [58] = WS2.sequence
- * [5c] = WS3.sequence
- */
- space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
- break;
- } else
- if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) {
- /* for each MP:
- * [00] = MP.C0
- * [04] = MP.C1
- * [08] = MP.C2
- * [0c] = MP.C3
- * [10] = MP.C4
- * [14] = MP.C5
- * [18] = MP.C6
- * [1c] = MP.C7
- * [20] = MP.sequence
- */
- space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
- break;
- }
- }
- debug_printf("invalid query type: %u\n", type);
- FREE(q);
- return NULL;
- }
- if (!nvc0_query_allocate(nvc0, q, space)) {
- FREE(q);
- return NULL;
- }
-
- q->type = type;
-
- if (q->rotate) {
- /* we advance before query_begin ! */
- q->offset -= q->rotate;
- q->data -= q->rotate / sizeof(*q->data);
- } else
- if (!q->is64bit)
- q->data[0] = 0; /* initialize sequence */
+ q = nvc0_hw_create_query(nvc0, type, index);
return (struct pipe_query *)q;
}
static void
-nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
- unsigned offset, uint32_t get)
+nvc0_destroy_query(struct pipe_context *pipe, struct pipe_query *pq)
{
- offset += q->offset;
-
- PUSH_SPACE(push, 5);
- PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
- PUSH_DATAh(push, q->bo->offset + offset);
- PUSH_DATA (push, q->bo->offset + offset);
- PUSH_DATA (push, q->sequence);
- PUSH_DATA (push, get);
-}
-
-static void
-nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
-{
- q->offset += q->rotate;
- q->data += q->rotate / sizeof(*q->data);
- if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
- nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+ struct nvc0_query *q = nvc0_query(pq);
+ q->funcs->destroy_query(nvc0_context(pipe), q);
}
static boolean
-nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+nvc0_begin_query(struct pipe_context *pipe, struct pipe_query *pq)
{
- struct nvc0_context *nvc0 = nvc0_context(pipe);
- struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_query *q = nvc0_query(pq);
- bool ret = true;
-
- /* For occlusion queries we have to change the storage, because a previous
- * query might set the initial render conition to false even *after* we re-
- * initialized it to true.
- */
- if (q->rotate) {
- nvc0_query_rotate(nvc0, q);
-
- /* XXX: can we do this with the GPU, and sync with respect to a previous
- * query ?
- */
- q->data[0] = q->sequence; /* initialize sequence */
- q->data[1] = 1; /* initial render condition = true */
- q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
- q->data[5] = 0;
- }
- q->sequence++;
-
- switch (q->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- q->nesting = nvc0->screen->num_occlusion_queries_active++;
- if (q->nesting) {
- nvc0_query_get(push, q, 0x10, 0x0100f002);
- } else {
- PUSH_SPACE(push, 3);
- BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
- PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
- IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
- }
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
- break;
- case PIPE_QUERY_SO_STATISTICS:
- nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
- nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
- break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- nvc0_query_get(push, q, 0x10, 0x00005002);
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
- nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
- nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
- nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
- nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
- nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
- nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
- nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
- nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
- nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
- break;
- default:
-#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
- if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
- q->type <= NVC0_QUERY_DRV_STAT_LAST) {
- if (q->index >= 5)
- q->u.value = nvc0->screen->base.stats.v[q->index];
- else
- q->u.value = 0;
- } else
-#endif
- if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
- (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
- ret = nvc0_hw_sm_query_begin(nvc0, q);
- }
- break;
- }
- q->state = NVC0_QUERY_STATE_ACTIVE;
- return ret;
+ return q->funcs->begin_query(nvc0_context(pipe), q);
}
static void
-nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+nvc0_end_query(struct pipe_context *pipe, struct pipe_query *pq)
{
- struct nvc0_context *nvc0 = nvc0_context(pipe);
- struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_query *q = nvc0_query(pq);
-
- if (q->state != NVC0_QUERY_STATE_ACTIVE) {
- /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
- if (q->rotate)
- nvc0_query_rotate(nvc0, q);
- q->sequence++;
- }
- q->state = NVC0_QUERY_STATE_ENDED;
-
- switch (q->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- nvc0_query_get(push, q, 0, 0x0100f002);
- if (--nvc0->screen->num_occlusion_queries_active == 0) {
- PUSH_SPACE(push, 1);
- IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
- }
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
- break;
- case PIPE_QUERY_SO_STATISTICS:
- nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
- nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
- break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- /* TODO: How do we sum over all streams for render condition ? */
- /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
- nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
- nvc0_query_get(push, q, 0x20, 0x00005002);
- break;
- case PIPE_QUERY_TIMESTAMP:
- case PIPE_QUERY_TIME_ELAPSED:
- nvc0_query_get(push, q, 0, 0x00005002);
- break;
- case PIPE_QUERY_GPU_FINISHED:
- nvc0_query_get(push, q, 0, 0x1000f010);
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
- nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
- nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
- nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
- nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
- nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
- nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
- nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
- nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
- nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
- break;
- case NVC0_QUERY_TFB_BUFFER_OFFSET:
- /* indexed by TFB buffer instead of by vertex stream */
- nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
- break;
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* This query is not issued on GPU because disjoint is forced to false */
- q->state = NVC0_QUERY_STATE_READY;
- break;
- default:
-#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
- if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
- q->type <= NVC0_QUERY_DRV_STAT_LAST) {
- q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value;
- return;
- } else
-#endif
- if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
- (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
- nvc0_hw_sm_query_end(nvc0, q);
- }
- break;
- }
- if (q->is64bit)
- nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
-}
-
-static inline void
-nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
-{
- if (q->is64bit) {
- if (nouveau_fence_signalled(q->fence))
- q->state = NVC0_QUERY_STATE_READY;
- } else {
- if (q->data[0] == q->sequence)
- q->state = NVC0_QUERY_STATE_READY;
- }
+ q->funcs->end_query(nvc0_context(pipe), q);
}
static boolean
-nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
- boolean wait, union pipe_query_result *result)
+nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, union pipe_query_result *result)
{
- struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nvc0_query *q = nvc0_query(pq);
- uint64_t *res64 = (uint64_t*)result;
- uint32_t *res32 = (uint32_t*)result;
- uint8_t *res8 = (uint8_t*)result;
- uint64_t *data64 = (uint64_t *)q->data;
- unsigned i;
-
-#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
- if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
- q->type <= NVC0_QUERY_DRV_STAT_LAST) {
- res64[0] = q->u.value;
- return true;
- } else
-#endif
- if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
- (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
- return nvc0_hw_sm_query_result(nvc0, q, result, wait);
- }
-
- if (q->state != NVC0_QUERY_STATE_READY)
- nvc0_query_update(nvc0->screen->base.client, q);
-
- if (q->state != NVC0_QUERY_STATE_READY) {
- if (!wait) {
- if (q->state != NVC0_QUERY_STATE_FLUSHED) {
- q->state = NVC0_QUERY_STATE_FLUSHED;
- /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
- PUSH_KICK(nvc0->base.pushbuf);
- }
- return false;
- }
- if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
- return false;
- NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
- }
- q->state = NVC0_QUERY_STATE_READY;
-
- switch (q->type) {
- case PIPE_QUERY_GPU_FINISHED:
- res8[0] = true;
- break;
- case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
- res64[0] = q->data[1] - q->data[5];
- break;
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- res8[0] = q->data[1] != q->data[5];
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
- case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
- res64[0] = data64[0] - data64[2];
- break;
- case PIPE_QUERY_SO_STATISTICS:
- res64[0] = data64[0] - data64[4];
- res64[1] = data64[2] - data64[6];
- break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- res8[0] = data64[0] != data64[2];
- break;
- case PIPE_QUERY_TIMESTAMP:
- res64[0] = data64[1];
- break;
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- res64[0] = 1000000000;
- res8[8] = false;
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- res64[0] = data64[1] - data64[3];
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- for (i = 0; i < 10; ++i)
- res64[i] = data64[i * 2] - data64[24 + i * 2];
- break;
- case NVC0_QUERY_TFB_BUFFER_OFFSET:
- res32[0] = q->data[1];
- break;
- default:
- assert(0); /* can't happen, we don't create queries with invalid type */
- return false;
- }
-
- return true;
-}
-
-void
-nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
-{
- struct nvc0_query *q = nvc0_query(pq);
- unsigned offset = q->offset;
-
- if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
-
- PUSH_SPACE(push, 5);
- PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
- PUSH_DATAh(push, q->bo->offset + offset);
- PUSH_DATA (push, q->bo->offset + offset);
- PUSH_DATA (push, q->sequence);
- PUSH_DATA (push, (1 << 12) |
- NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+ return q->funcs->get_query_result(nvc0_context(pipe), q, wait, result);
}
static void
@@ -547,7 +79,8 @@ nvc0_render_condition(struct pipe_context *pipe,
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- struct nvc0_query *q;
+ struct nvc0_query *q = nvc0_query(pq);
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
uint32_t cond;
bool wait =
mode != PIPE_RENDER_COND_NO_WAIT &&
@@ -557,7 +90,6 @@ nvc0_render_condition(struct pipe_context *pipe,
cond = NVC0_3D_COND_MODE_ALWAYS;
}
else {
- q = nvc0_query(pq);
/* NOTE: comparison of 2 queries only works if both have completed */
switch (q->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
@@ -568,7 +100,7 @@ nvc0_render_condition(struct pipe_context *pipe,
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
if (likely(!condition)) {
- if (unlikely(q->nesting))
+ if (unlikely(hq->nesting))
cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
NVC0_3D_COND_MODE_ALWAYS;
else
@@ -596,805 +128,17 @@ nvc0_render_condition(struct pipe_context *pipe,
}
if (wait)
- nvc0_query_fifo_wait(push, pq);
+ nvc0_hw_query_fifo_wait(push, q);
PUSH_SPACE(push, 7);
- PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
- PUSH_DATAh(push, q->bo->offset + q->offset);
- PUSH_DATA (push, q->bo->offset + q->offset);
+ PUSH_DATAh(push, hq->bo->offset + hq->offset);
+ PUSH_DATA (push, hq->bo->offset + hq->offset);
PUSH_DATA (push, cond);
BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, q->bo->offset + q->offset);
- PUSH_DATA (push, q->bo->offset + q->offset);
-}
-
-void
-nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
- struct pipe_query *pq, unsigned result_offset)
-{
- struct nvc0_query *q = nvc0_query(pq);
-
-#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
-
- PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
- nouveau_pushbuf_space(push, 0, 0, 1);
- nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
- NVC0_IB_ENTRY_1_NO_PREFETCH);
-}
-
-void
-nvc0_so_target_save_offset(struct pipe_context *pipe,
- struct pipe_stream_output_target *ptarg,
- unsigned index, bool *serialize)
-{
- struct nvc0_so_target *targ = nvc0_so_target(ptarg);
-
- if (*serialize) {
- *serialize = false;
- PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
- IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
-
- NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
- }
-
- nvc0_query(targ->pq)->index = index;
-
- nvc0_query_end(pipe, targ->pq);
-}
-
-
-/* === DRIVER STATISTICS === */
-
-#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
-
-static const char *nvc0_drv_stat_names[] =
-{
- "drv-tex_obj_current_count",
- "drv-tex_obj_current_bytes",
- "drv-buf_obj_current_count",
- "drv-buf_obj_current_bytes_vid",
- "drv-buf_obj_current_bytes_sys",
- "drv-tex_transfers_rd",
- "drv-tex_transfers_wr",
- "drv-tex_copy_count",
- "drv-tex_blit_count",
- "drv-tex_cache_flush_count",
- "drv-buf_transfers_rd",
- "drv-buf_transfers_wr",
- "drv-buf_read_bytes_staging_vid",
- "drv-buf_write_bytes_direct",
- "drv-buf_write_bytes_staging_vid",
- "drv-buf_write_bytes_staging_sys",
- "drv-buf_copy_bytes",
- "drv-buf_non_kernel_fence_sync_count",
- "drv-any_non_kernel_fence_sync_count",
- "drv-query_sync_count",
- "drv-gpu_serialize_count",
- "drv-draw_calls_array",
- "drv-draw_calls_indexed",
- "drv-draw_calls_fallback_count",
- "drv-user_buffer_upload_bytes",
- "drv-constbuf_upload_count",
- "drv-constbuf_upload_bytes",
- "drv-pushbuf_count",
- "drv-resource_validate_count"
-};
-
-#endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
-
-
-/* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
-
-/* Code to read out MP counters: They are accessible via mmio, too, but let's
- * just avoid mapping registers in userspace. We'd have to know which MPs are
- * enabled/present, too, and that information is not presently exposed.
- * We could add a kernel interface for it, but reading the counters like this
- * has the advantage of being async (if get_result isn't called immediately).
- */
-static const uint64_t nve4_read_hw_sm_counters_code[] =
-{
- /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
- * mov b32 $r8 $tidx
- * mov b32 $r12 $physid
- * mov b32 $r0 $pm0
- * mov b32 $r1 $pm1
- * mov b32 $r2 $pm2
- * mov b32 $r3 $pm3
- * mov b32 $r4 $pm4
- * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
- * mov b32 $r5 $pm5
- * mov b32 $r6 $pm6
- * mov b32 $r7 $pm7
- * set $p0 0x1 eq u32 $r8 0x0
- * mov b32 $r10 c0[0x0]
- * ext u32 $r8 $r12 0x414
- * mov b32 $r11 c0[0x4]
- * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
- * ext u32 $r9 $r12 0x208
- * (not $p0) exit
- * set $p1 0x1 eq u32 $r9 0x0
- * mul $r8 u32 $r8 u32 96
- * mul $r12 u32 $r9 u32 16
- * mul $r13 u32 $r9 u32 4
- * add b32 $r9 $r8 $r13
- * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
- * add b32 $r8 $r8 $r12
- * mov b32 $r12 $r10
- * add b32 $r10 $c $r10 $r8
- * mov b32 $r13 $r11
- * add b32 $r11 $r11 0x0 $c
- * add b32 $r12 $c $r12 $r9
- * st b128 wt g[$r10d] $r0q
- * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
- * mov b32 $r0 c0[0x8]
- * add b32 $r13 $r13 0x0 $c
- * $p1 st b128 wt g[$r12d+0x40] $r4q
- * st b32 wt g[$r12d+0x50] $r0
- * exit */
- 0x2202020202020207ULL,
- 0x2c00000084021c04ULL,
- 0x2c0000000c031c04ULL,
- 0x2c00000010001c04ULL,
- 0x2c00000014005c04ULL,
- 0x2c00000018009c04ULL,
- 0x2c0000001c00dc04ULL,
- 0x2c00000020011c04ULL,
- 0x22b0420042320207ULL,
- 0x2c00000024015c04ULL,
- 0x2c00000028019c04ULL,
- 0x2c0000002c01dc04ULL,
- 0x190e0000fc81dc03ULL,
- 0x2800400000029de4ULL,
- 0x7000c01050c21c03ULL,
- 0x280040001002dde4ULL,
- 0x204282020042e047ULL,
- 0x7000c00820c25c03ULL,
- 0x80000000000021e7ULL,
- 0x190e0000fc93dc03ULL,
- 0x1000000180821c02ULL,
- 0x1000000040931c02ULL,
- 0x1000000010935c02ULL,
- 0x4800000034825c03ULL,
- 0x22c042c042c04287ULL,
- 0x4800000030821c03ULL,
- 0x2800000028031de4ULL,
- 0x4801000020a29c03ULL,
- 0x280000002c035de4ULL,
- 0x0800000000b2dc42ULL,
- 0x4801000024c31c03ULL,
- 0x9400000000a01fc5ULL,
- 0x200002e04202c047ULL,
- 0x2800400020001de4ULL,
- 0x0800000000d35c42ULL,
- 0x9400000100c107c5ULL,
- 0x9400000140c01f85ULL,
- 0x8000000000001de7ULL
-};
-
-/* NOTE: intentionally using the same names as NV */
-static const char *nve4_pm_query_names[] =
-{
- /* MP counters */
- "active_cycles",
- "active_warps",
- "atom_count",
- "branch",
- "divergent_branch",
- "gld_request",
- "global_ld_mem_divergence_replays",
- "global_store_transaction",
- "global_st_mem_divergence_replays",
- "gred_count",
- "gst_request",
- "inst_executed",
- "inst_issued",
- "inst_issued1",
- "inst_issued2",
- "l1_global_load_hit",
- "l1_global_load_miss",
- "l1_local_load_hit",
- "l1_local_load_miss",
- "l1_local_store_hit",
- "l1_local_store_miss",
- "l1_shared_load_transactions",
- "l1_shared_store_transactions",
- "local_load",
- "local_load_transactions",
- "local_store",
- "local_store_transactions",
- "prof_trigger_00",
- "prof_trigger_01",
- "prof_trigger_02",
- "prof_trigger_03",
- "prof_trigger_04",
- "prof_trigger_05",
- "prof_trigger_06",
- "prof_trigger_07",
- "shared_load",
- "shared_load_replay",
- "shared_store",
- "shared_store_replay",
- "sm_cta_launched",
- "threads_launched",
- "uncached_global_load_transaction",
- "warps_launched",
- /* metrics, i.e. functions of the MP counters */
- "metric-ipc", /* inst_executed, clock */
- "metric-ipac", /* inst_executed, active_cycles */
- "metric-ipec", /* inst_executed, (bool)inst_executed */
- "metric-achieved_occupancy", /* active_warps, active_cycles */
- "metric-sm_efficiency", /* active_cycles, clock */
- "metric-inst_replay_overhead" /* inst_issued, inst_executed */
-};
-
-/* For simplicity, we will allocate as many group slots as we allocate counter
- * slots. This means that a single counter which wants to source from 2 groups
- * will have to be declared as using 2 counter slots. This shouldn't really be
- * a problem because such queries don't make much sense ... (unless someone is
- * really creative).
- */
-struct nvc0_mp_counter_cfg
-{
- uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
- uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
- uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
- uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
- uint32_t sig_sel : 8; /* signal group */
- uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
-};
-
-#define NVC0_COUNTER_OPn_SUM 0
-#define NVC0_COUNTER_OPn_OR 1
-#define NVC0_COUNTER_OPn_AND 2
-#define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
-#define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
-#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
-#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
-
-struct nvc0_hw_sm_query_cfg
-{
- struct nvc0_mp_counter_cfg ctr[4];
- uint8_t num_counters;
- uint8_t op;
- uint8_t norm[2]; /* normalization num,denom */
-};
-
-#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
-#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
-#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
- { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
- { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
- {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
-#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
- { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
- { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
- {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
-#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
- { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
- { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
- {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
-
-/* NOTES:
- * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
- * inst_executed etc.: we only count a single warp scheduler
- * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
- * this is inaccurate !
- */
-static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] =
-{
- _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
- _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
- _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
- _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
- _Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
- _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
- _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
- _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
- _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
- _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
- _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
- _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
- _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
- _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
- _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
- _Q1B(L1_GLD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
- _Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
- _Q1B(L1_LOCAL_LD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
- _Q1B(L1_LOCAL_LD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
- _Q1B(L1_LOCAL_ST_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
- _Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
- _Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
- _Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
- _Q1A(LOCAL_LD, 0x0001, B6, LDST, 0x00000008, 1, 1),
- _Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
- _Q1A(LOCAL_ST, 0x0001, B6, LDST, 0x0000000c, 1, 1),
- _Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
- _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
- _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
- _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
- _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
- _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
- _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
- _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
- _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
- _Q1A(SHARED_LD, 0x0001, B6, LDST, 0x00000000, 1, 1),
- _Q1B(SHARED_LD_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
- _Q1A(SHARED_ST, 0x0001, B6, LDST, 0x00000004, 1, 1),
- _Q1B(SHARED_ST_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
- _Q1B(SM_CTA_LAUNCHED, 0x0001, B6, WARP, 0x0000001c, 1, 1),
- _Q1A(THREADS_LAUNCHED, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
- _Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1),
- _Q1A(WARPS_LAUNCHED, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
- _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
- _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
- _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
- _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
- _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
- _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
-};
-
-#undef _Q1A
-#undef _Q1B
-#undef _M2A
-#undef _M2B
-
-/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
-static const uint64_t nvc0_read_hw_sm_counters_code[] =
-{
- /* mov b32 $r8 $tidx
- * mov b32 $r9 $physid
- * mov b32 $r0 $pm0
- * mov b32 $r1 $pm1
- * mov b32 $r2 $pm2
- * mov b32 $r3 $pm3
- * mov b32 $r4 $pm4
- * mov b32 $r5 $pm5
- * mov b32 $r6 $pm6
- * mov b32 $r7 $pm7
- * set $p0 0x1 eq u32 $r8 0x0
- * mov b32 $r10 c0[0x0]
- * mov b32 $r11 c0[0x4]
- * ext u32 $r8 $r9 0x414
- * (not $p0) exit
- * mul $r8 u32 $r8 u32 36
- * add b32 $r10 $c $r10 $r8
- * add b32 $r11 $r11 0x0 $c
- * mov b32 $r8 c0[0x8]
- * st b128 wt g[$r10d+0x00] $r0q
- * st b128 wt g[$r10d+0x10] $r4q
- * st b32 wt g[$r10d+0x20] $r8
- * exit */
- 0x2c00000084021c04ULL,
- 0x2c0000000c025c04ULL,
- 0x2c00000010001c04ULL,
- 0x2c00000014005c04ULL,
- 0x2c00000018009c04ULL,
- 0x2c0000001c00dc04ULL,
- 0x2c00000020011c04ULL,
- 0x2c00000024015c04ULL,
- 0x2c00000028019c04ULL,
- 0x2c0000002c01dc04ULL,
- 0x190e0000fc81dc03ULL,
- 0x2800400000029de4ULL,
- 0x280040001002dde4ULL,
- 0x7000c01050921c03ULL,
- 0x80000000000021e7ULL,
- 0x1000000090821c02ULL,
- 0x4801000020a29c03ULL,
- 0x0800000000b2dc42ULL,
- 0x2800400020021de4ULL,
- 0x9400000000a01fc5ULL,
- 0x9400000040a11fc5ULL,
- 0x9400000080a21f85ULL,
- 0x8000000000001de7ULL
-};
-
-static const char *nvc0_pm_query_names[] =
-{
- /* MP counters */
- "active_cycles",
- "active_warps",
- "atom_count",
- "branch",
- "divergent_branch",
- "gld_request",
- "gred_count",
- "gst_request",
- "inst_executed",
- "inst_issued1_0",
- "inst_issued1_1",
- "inst_issued2_0",
- "inst_issued2_1",
- "local_load",
- "local_store",
- "prof_trigger_00",
- "prof_trigger_01",
- "prof_trigger_02",
- "prof_trigger_03",
- "prof_trigger_04",
- "prof_trigger_05",
- "prof_trigger_06",
- "prof_trigger_07",
- "shared_load",
- "shared_store",
- "threads_launched",
- "thread_inst_executed_0",
- "thread_inst_executed_1",
- "thread_inst_executed_2",
- "thread_inst_executed_3",
- "warps_launched",
-};
-
-#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
-
-static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
-{
- _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
- _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
- _Q(DIVERGENT_BRANCH, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
- _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
- _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(LOCAL_LD, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(LOCAL_ST, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(SHARED_LD, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(SHARED_ST, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
- _Q(THREADS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
- _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
- _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
- _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
- _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
- _Q(WARPS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
-};
-
-#undef _Q
-
-static const struct nvc0_hw_sm_query_cfg *
-nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
-{
- struct nvc0_screen *screen = nvc0->screen;
-
- if (screen->base.class_3d >= NVE4_3D_CLASS)
- return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
- return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)];
-}
-
-boolean
-nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
-{
- struct nvc0_screen *screen = nvc0->screen;
- struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
- const struct nvc0_hw_sm_query_cfg *cfg;
- unsigned i, c;
- unsigned num_ab[2] = { 0, 0 };
-
- cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
-
- /* check if we have enough free counter slots */
- for (i = 0; i < cfg->num_counters; ++i)
- num_ab[cfg->ctr[i].sig_dom]++;
-
- if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
- screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
- NOUVEAU_ERR("Not enough free MP counter slots !\n");
- return false;
- }
-
- assert(cfg->num_counters <= 4);
- PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
-
- if (!screen->pm.mp_counters_enabled) {
- screen->pm.mp_counters_enabled = true;
- BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
- PUSH_DATA (push, 0x1fcb);
- }
-
- /* set sequence field to 0 (used to check if result is available) */
- for (i = 0; i < screen->mp_count; ++i)
- q->data[i * 10 + 10] = 0;
-
- for (i = 0; i < cfg->num_counters; ++i) {
- const unsigned d = cfg->ctr[i].sig_dom;
-
- if (!screen->pm.num_hw_sm_active[d]) {
- uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
- if (screen->pm.num_hw_sm_active[!d])
- m |= 1 << (7 + (8 * d));
- BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
- PUSH_DATA (push, m);
- }
- screen->pm.num_hw_sm_active[d]++;
-
- for (c = d * 4; c < (d * 4 + 4); ++c) {
- if (!screen->pm.mp_counter[c]) {
- q->ctr[i] = c;
- screen->pm.mp_counter[c] = (struct pipe_query *)q;
- break;
- }
- }
- assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
-
- /* configure and reset the counter(s) */
- if (is_nve4) {
- if (d == 0)
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
- else
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
- PUSH_DATA (push, cfg->ctr[i].sig_sel);
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
- PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
- PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
- PUSH_DATA (push, 0);
- } else {
- unsigned s;
-
- for (s = 0; s < cfg->ctr[i].num_src; s++) {
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
- PUSH_DATA (push, cfg->ctr[i].sig_sel);
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
- PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
- PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
- PUSH_DATA (push, 0);
- }
- }
- }
- return true;
-}
-
-static void
-nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
-{
- struct nvc0_screen *screen = nvc0->screen;
- struct pipe_context *pipe = &nvc0->base.pipe;
- struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
- uint32_t mask;
- uint32_t input[3];
- const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
- const uint grid[3] = { screen->mp_count, 1, 1 };
- unsigned c;
- const struct nvc0_hw_sm_query_cfg *cfg;
-
- cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
-
- if (unlikely(!screen->pm.prog)) {
- struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
- prog->type = PIPE_SHADER_COMPUTE;
- prog->translated = true;
- prog->num_gprs = 14;
- prog->parm_size = 12;
- if (is_nve4) {
- prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
- prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
- } else {
- prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
- prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
- }
- screen->pm.prog = prog;
- }
-
- /* disable all counting */
- PUSH_SPACE(push, 8);
- for (c = 0; c < 8; ++c)
- if (screen->pm.mp_counter[c]) {
- if (is_nve4) {
- IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
- } else {
- IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
- }
- }
- /* release counters for this query */
- for (c = 0; c < 8; ++c) {
- if (nvc0_query(screen->pm.mp_counter[c]) == q) {
- screen->pm.num_hw_sm_active[c / 4]--;
- screen->pm.mp_counter[c] = NULL;
- }
- }
-
- BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
- q->bo);
-
- PUSH_SPACE(push, 1);
- IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
-
- pipe->bind_compute_state(pipe, screen->pm.prog);
- input[0] = (q->bo->offset + q->base);
- input[1] = (q->bo->offset + q->base) >> 32;
- input[2] = q->sequence;
- pipe->launch_grid(pipe, block, grid, 0, input);
-
- nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
-
- /* re-activate other counters */
- PUSH_SPACE(push, 16);
- mask = 0;
- for (c = 0; c < 8; ++c) {
- unsigned i;
- q = nvc0_query(screen->pm.mp_counter[c]);
- if (!q)
- continue;
- cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
- for (i = 0; i < cfg->num_counters; ++i) {
- if (mask & (1 << q->ctr[i]))
- break;
- mask |= 1 << q->ctr[i];
- if (is_nve4) {
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
- } else {
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
- }
- PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
- }
- }
-}
-
-static inline bool
-nvc0_hw_sm_query_read_data(uint32_t count[32][4],
- struct nvc0_context *nvc0, bool wait,
- struct nvc0_query *q,
- const struct nvc0_hw_sm_query_cfg *cfg,
- unsigned mp_count)
-{
- unsigned p, c;
-
- for (p = 0; p < mp_count; ++p) {
- const unsigned b = (0x24 / 4) * p;
-
- for (c = 0; c < cfg->num_counters; ++c) {
- if (q->data[b + 8] != q->sequence) {
- if (!wait)
- return false;
- if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
- return false;
- }
- count[p][c] = q->data[b + q->ctr[c]];
- }
- }
- return true;
-}
-
-static inline bool
-nve4_hw_sm_query_read_data(uint32_t count[32][4],
- struct nvc0_context *nvc0, bool wait,
- struct nvc0_query *q,
- const struct nvc0_hw_sm_query_cfg *cfg,
- unsigned mp_count)
-{
- unsigned p, c, d;
-
- for (p = 0; p < mp_count; ++p) {
- const unsigned b = (0x60 / 4) * p;
-
- for (c = 0; c < cfg->num_counters; ++c) {
- count[p][c] = 0;
- for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
- if (q->data[b + 20 + d] != q->sequence) {
- if (!wait)
- return false;
- if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
- return false;
- }
- if (q->ctr[c] & ~0x3)
- count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
- else
- count[p][c] += q->data[b + d * 4 + q->ctr[c]];
- }
- }
- }
- return true;
-}
-
-/* Metric calculations:
- * sum(x) ... sum of x over all MPs
- * avg(x) ... average of x over all MPs
- *
- * IPC : sum(inst_executed) / clock
- * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
- * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
- * MP_EFFICIENCY : avg(active_cycles / clock)
- *
- * NOTE: Interpretation of IPC requires knowledge of MP count.
- */
-static boolean
-nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
- void *result, boolean wait)
-{
- uint32_t count[32][4];
- uint64_t value = 0;
- unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
- unsigned p, c;
- const struct nvc0_hw_sm_query_cfg *cfg;
- bool ret;
-
- cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
-
- if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
- ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
- else
- ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
- if (!ret)
- return false;
-
- if (cfg->op == NVC0_COUNTER_OPn_SUM) {
- for (c = 0; c < cfg->num_counters; ++c)
- for (p = 0; p < mp_count; ++p)
- value += count[p][c];
- value = (value * cfg->norm[0]) / cfg->norm[1];
- } else
- if (cfg->op == NVC0_COUNTER_OPn_OR) {
- uint32_t v = 0;
- for (c = 0; c < cfg->num_counters; ++c)
- for (p = 0; p < mp_count; ++p)
- v |= count[p][c];
- value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
- } else
- if (cfg->op == NVC0_COUNTER_OPn_AND) {
- uint32_t v = ~0;
- for (c = 0; c < cfg->num_counters; ++c)
- for (p = 0; p < mp_count; ++p)
- v &= count[p][c];
- value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
- } else
- if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
- uint64_t v[2] = { 0, 0 };
- for (p = 0; p < mp_count; ++p) {
- v[0] += count[p][0];
- v[1] += count[p][1];
- }
- if (v[0])
- value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
- } else
- if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
- for (p = 0; p < mp_count; ++p)
- value += count[p][0];
- if (count[0][1])
- value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
- else
- value = 0;
- } else
- if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
- unsigned mp_used = 0;
- for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
- if (count[p][1])
- value += (count[p][0] * cfg->norm[0]) / count[p][1];
- if (mp_used)
- value /= (uint64_t)mp_used * cfg->norm[1];
- } else
- if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
- unsigned mp_used = 0;
- for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
- value += count[p][0];
- if (count[0][1] && mp_used) {
- value *= cfg->norm[0];
- value /= (uint64_t)count[0][1] * mp_used * cfg->norm[1];
- } else {
- value = 0;
- }
- }
-
- *(uint64_t *)result = value;
- return true;
+ PUSH_DATAh(push, hq->bo->offset + hq->offset);
+ PUSH_DATA (push, hq->bo->offset + hq->offset);
}
int
@@ -1403,24 +147,13 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
struct pipe_driver_query_info *info)
{
struct nvc0_screen *screen = nvc0_screen(pscreen);
- int count = 0;
+ int num_sw_queries = 0, num_hw_queries = 0;
- count += NVC0_QUERY_DRV_STAT_COUNT;
-
- if (screen->base.device->drm_version >= 0x01000101) {
- if (screen->compute) {
- if (screen->base.class_3d == NVE4_3D_CLASS) {
- count += NVE4_HW_SM_QUERY_COUNT;
- } else
- if (screen->base.class_3d < NVE4_3D_CLASS) {
- /* NVC0_COMPUTE is not always enabled */
- count += NVC0_HW_SM_QUERY_COUNT;
- }
- }
- }
+ num_sw_queries = nvc0_sw_get_driver_query_info(screen, 0, NULL);
+ num_hw_queries = nvc0_hw_get_driver_query_info(screen, 0, NULL);
if (!info)
- return count;
+ return num_sw_queries + num_hw_queries;
/* Init default values. */
info->name = "this_is_not_the_query_you_are_looking_for";
@@ -1430,36 +163,11 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
info->group_id = -1;
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
- if (id < NVC0_QUERY_DRV_STAT_COUNT) {
- info->name = nvc0_drv_stat_names[id];
- info->query_type = NVC0_QUERY_DRV_STAT(id);
- info->max_value.u64 = 0;
- if (strstr(info->name, "bytes"))
- info->type = PIPE_DRIVER_QUERY_TYPE_BYTES;
- info->group_id = NVC0_QUERY_DRV_STAT_GROUP;
- return 1;
- } else
+ if (id < num_sw_queries)
+ return nvc0_sw_get_driver_query_info(screen, id, info);
#endif
- if (id < count) {
- if (screen->compute) {
- if (screen->base.class_3d == NVE4_3D_CLASS) {
- info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
- info->query_type = NVE4_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
- info->max_value.u64 =
- (id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
- info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
- return 1;
- } else
- if (screen->base.class_3d < NVE4_3D_CLASS) {
- info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
- info->query_type = NVC0_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
- info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
- return 1;
- }
- }
- }
- /* user asked for info about non-existing query */
- return 0;
+
+ return nvc0_hw_get_driver_query_info(screen, id - num_sw_queries, info);
}
int
@@ -1480,7 +188,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
count++;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
- count++; /* NVC0_COMPUTE is not always enabled */
+ count++;
}
}
}
@@ -1488,37 +196,35 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
if (!info)
return count;
- if (id == NVC0_QUERY_MP_COUNTER_GROUP) {
+ if (id == NVC0_HW_SM_QUERY_GROUP) {
if (screen->compute) {
info->name = "MP counters";
info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
+ /* Because we can't expose the number of hardware counters needed for
+ * each different query, we don't want to allow more than one active
+ * query simultaneously to avoid failure when the maximum number of
+ * counters is reached. Note that these groups of GPU counters are
+ * currently only used by AMD_performance_monitor.
+ */
+ info->max_active_queries = 1;
+
if (screen->base.class_3d == NVE4_3D_CLASS) {
info->num_queries = NVE4_HW_SM_QUERY_COUNT;
-
- /* On NVE4+, each multiprocessor have 8 hardware counters separated
- * in two distinct domains, but we allow only one active query
- * simultaneously because some of them use more than one hardware
- * counter and this will result in an undefined behaviour. */
- info->max_active_queries = 1; /* TODO: handle multiple hw counters */
- return 1;
+ return 1;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->num_queries = NVC0_HW_SM_QUERY_COUNT;
-
- /* On NVC0:NVE4, each multiprocessor have 8 hardware counters
- * in a single domain. */
- info->max_active_queries = 8;
return 1;
}
}
}
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
- else if (id == NVC0_QUERY_DRV_STAT_GROUP) {
+ else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
info->name = "Driver statistics";
info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
- info->max_active_queries = NVC0_QUERY_DRV_STAT_COUNT;
- info->num_queries = NVC0_QUERY_DRV_STAT_COUNT;
+ info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
+ info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
return 1;
}
#endif
@@ -1536,10 +242,10 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
{
struct pipe_context *pipe = &nvc0->base.pipe;
- pipe->create_query = nvc0_query_create;
- pipe->destroy_query = nvc0_query_destroy;
- pipe->begin_query = nvc0_query_begin;
- pipe->end_query = nvc0_query_end;
- pipe->get_query_result = nvc0_query_result;
+ pipe->create_query = nvc0_create_query;
+ pipe->destroy_query = nvc0_destroy_query;
+ pipe->begin_query = nvc0_begin_query;
+ pipe->end_query = nvc0_end_query;
+ pipe->get_query_result = nvc0_get_query_result;
pipe->render_condition = nvc0_render_condition;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
new file mode 100644
index 00000000000..6883ab6ab9d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
@@ -0,0 +1,39 @@
+#ifndef __NVC0_QUERY_H__
+#define __NVC0_QUERY_H__
+
+#include "pipe/p_context.h"
+
+#include "nouveau_context.h"
+
+struct nvc0_context;
+struct nvc0_query;
+
+struct nvc0_query_funcs {
+ void (*destroy_query)(struct nvc0_context *, struct nvc0_query *);
+ boolean (*begin_query)(struct nvc0_context *, struct nvc0_query *);
+ void (*end_query)(struct nvc0_context *, struct nvc0_query *);
+ boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *,
+ boolean, union pipe_query_result *);
+};
+
+struct nvc0_query {
+ const struct nvc0_query_funcs *funcs;
+ uint16_t type;
+ uint16_t index;
+};
+
+static inline struct nvc0_query *
+nvc0_query(struct pipe_query *pipe)
+{
+ return (struct nvc0_query *)pipe;
+}
+
+/*
+ * Driver queries groups:
+ */
+#define NVC0_HW_SM_QUERY_GROUP 0
+#define NVC0_SW_QUERY_DRV_STAT_GROUP 1
+
+void nvc0_init_query_functions(struct nvc0_context *);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
new file mode 100644
index 00000000000..90ee82f21e5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw.h"
+#include "nvc0/nvc0_query_hw_metric.h"
+#include "nvc0/nvc0_query_hw_sm.h"
+
+#define NVC0_HW_QUERY_STATE_READY 0
+#define NVC0_HW_QUERY_STATE_ACTIVE 1
+#define NVC0_HW_QUERY_STATE_ENDED 2
+#define NVC0_HW_QUERY_STATE_FLUSHED 3
+
+#define NVC0_HW_QUERY_ALLOC_SPACE 256
+
+bool
+nvc0_hw_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q,
+ int size)
+{
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+
+ if (hq->bo) {
+ nouveau_bo_ref(NULL, &hq->bo);
+ if (hq->mm) {
+ if (hq->state == NVC0_HW_QUERY_STATE_READY)
+ nouveau_mm_free(hq->mm);
+ else
+ nouveau_fence_work(screen->base.fence.current,
+ nouveau_mm_free_work, hq->mm);
+ }
+ }
+ if (size) {
+ hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &hq->bo,
+ &hq->base_offset);
+ if (!hq->bo)
+ return false;
+ hq->offset = hq->base_offset;
+
+ ret = nouveau_bo_map(hq->bo, 0, screen->base.client);
+ if (ret) {
+ nvc0_hw_query_allocate(nvc0, q, 0);
+ return false;
+ }
+ hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);
+ }
+ return true;
+}
+
+static void
+nvc0_hw_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
+ unsigned offset, uint32_t get)
+{
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+
+ offset += hq->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->sequence);
+ PUSH_DATA (push, get);
+}
+
+static void
+nvc0_hw_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+
+ hq->offset += hq->rotate;
+ hq->data += hq->rotate / sizeof(*hq->data);
+ if (hq->offset - hq->base_offset == NVC0_HW_QUERY_ALLOC_SPACE)
+ nvc0_hw_query_allocate(nvc0, q, NVC0_HW_QUERY_ALLOC_SPACE);
+}
+
+static inline void
+nvc0_hw_query_update(struct nouveau_client *cli, struct nvc0_query *q)
+{
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+
+ if (hq->is64bit) {
+ if (nouveau_fence_signalled(hq->fence))
+ hq->state = NVC0_HW_QUERY_STATE_READY;
+ } else {
+ if (hq->data[0] == hq->sequence)
+ hq->state = NVC0_HW_QUERY_STATE_READY;
+ }
+}
+
+static void
+nvc0_hw_destroy_query(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+ nvc0_hw_query_allocate(nvc0, q, 0);
+ nouveau_fence_ref(NULL, &hq->fence);
+ FREE(hq);
+}
+
+static boolean
+nvc0_hw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+ bool ret = true;
+
+ if (hq->funcs && hq->funcs->begin_query)
+ return hq->funcs->begin_query(nvc0, hq);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render conition to false even *after* we re-
+ * initialized it to true.
+ */
+ if (hq->rotate) {
+ nvc0_hw_query_rotate(nvc0, q);
+
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ hq->data[0] = hq->sequence; /* initialize sequence */
+ hq->data[1] = 1; /* initial render condition = true */
+ hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */
+ hq->data[5] = 0;
+ }
+ hq->sequence++;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ hq->nesting = nvc0->screen->num_occlusion_queries_active++;
+ if (hq->nesting) {
+ nvc0_hw_query_get(push, q, 0x10, 0x0100f002);
+ } else {
+ PUSH_SPACE(push, 3);
+ BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_hw_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nvc0_hw_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nvc0_hw_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
+ nvc0_hw_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ nvc0_hw_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_hw_query_get(push, q, 0x10, 0x00005002);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nvc0_hw_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
+ nvc0_hw_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
+ nvc0_hw_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
+ nvc0_hw_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
+ nvc0_hw_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
+ nvc0_hw_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
+ nvc0_hw_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+ nvc0_hw_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
+ nvc0_hw_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
+ nvc0_hw_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
+ break;
+ default:
+ break;
+ }
+ hq->state = NVC0_HW_QUERY_STATE_ACTIVE;
+ return ret;
+}
+
+static void
+nvc0_hw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+
+ if (hq->funcs && hq->funcs->end_query) {
+ hq->funcs->end_query(nvc0, hq);
+ return;
+ }
+
+ if (hq->state != NVC0_HW_QUERY_STATE_ACTIVE) {
+ /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
+ if (hq->rotate)
+ nvc0_hw_query_rotate(nvc0, q);
+ hq->sequence++;
+ }
+ hq->state = NVC0_HW_QUERY_STATE_ENDED;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ nvc0_hw_query_get(push, q, 0, 0x0100f002);
+ if (--nvc0->screen->num_occlusion_queries_active == 0) {
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_hw_query_get(push, q, 0, 0x09005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nvc0_hw_query_get(push, q, 0, 0x05805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nvc0_hw_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
+ nvc0_hw_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* TODO: How do we sum over all streams for render condition ? */
+ /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
+ nvc0_hw_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
+ nvc0_hw_query_get(push, q, 0x20, 0x00005002);
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_hw_query_get(push, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ nvc0_hw_query_get(push, q, 0, 0x1000f010);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nvc0_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
+ nvc0_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
+ nvc0_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
+ nvc0_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
+ nvc0_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
+ nvc0_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
+ nvc0_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+ nvc0_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
+ nvc0_hw_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
+ nvc0_hw_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ /* This query is not issued on GPU because disjoint is forced to false */
+ hq->state = NVC0_HW_QUERY_STATE_READY;
+ break;
+ case NVC0_HW_QUERY_TFB_BUFFER_OFFSET:
+ /* indexed by TFB buffer instead of by vertex stream */
+ nvc0_hw_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
+ break;
+ default:
+ break;
+ }
+ if (hq->is64bit)
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &hq->fence);
+}
+
+static boolean
+nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
+ boolean wait, union pipe_query_result *result)
+{
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+ uint64_t *res64 = (uint64_t*)result;
+ uint32_t *res32 = (uint32_t*)result;
+ uint8_t *res8 = (uint8_t*)result;
+ uint64_t *data64 = (uint64_t *)hq->data;
+ unsigned i;
+
+ if (hq->funcs && hq->funcs->get_query_result)
+ return hq->funcs->get_query_result(nvc0, hq, wait, result);
+
+ if (hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_update(nvc0->screen->base.client, q);
+
+ if (hq->state != NVC0_HW_QUERY_STATE_READY) {
+ if (!wait) {
+ if (hq->state != NVC0_HW_QUERY_STATE_FLUSHED) {
+ hq->state = NVC0_HW_QUERY_STATE_FLUSHED;
+ /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
+ PUSH_KICK(nvc0->base.pushbuf);
+ }
+ return false;
+ }
+ if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
+ return false;
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
+ }
+ hq->state = NVC0_HW_QUERY_STATE_READY;
+
+ switch (q->type) {
+ case PIPE_QUERY_GPU_FINISHED:
+ res8[0] = true;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res64[0] = hq->data[1] - hq->data[5];
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ res8[0] = hq->data[1] != hq->data[5];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0] - data64[2];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0] - data64[4];
+ res64[1] = data64[2] - data64[6];
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ res8[0] = data64[0] != data64[2];
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ res64[0] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ res64[0] = 1000000000;
+ res8[8] = false;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ for (i = 0; i < 10; ++i)
+ res64[i] = data64[i * 2] - data64[24 + i * 2];
+ break;
+ case NVC0_HW_QUERY_TFB_BUFFER_OFFSET:
+ res32[0] = hq->data[1];
+ break;
+ default:
+ assert(0); /* can't happen, we don't create queries with invalid type */
+ return false;
+ }
+
+ return true;
+}
+
+static const struct nvc0_query_funcs hw_query_funcs = {
+ .destroy_query = nvc0_hw_destroy_query,
+ .begin_query = nvc0_hw_begin_query,
+ .end_query = nvc0_hw_end_query,
+ .get_query_result = nvc0_hw_get_query_result,
+};
+
+struct nvc0_query *
+nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned type, unsigned index)
+{
+ struct nvc0_hw_query *hq;
+ struct nvc0_query *q;
+ unsigned space = NVC0_HW_QUERY_ALLOC_SPACE;
+
+ hq = nvc0_hw_sm_create_query(nvc0, type);
+ if (hq) {
+ hq->base.funcs = &hw_query_funcs;
+ return (struct nvc0_query *)hq;
+ }
+
+ hq = nvc0_hw_metric_create_query(nvc0, type);
+ if (hq) {
+ hq->base.funcs = &hw_query_funcs;
+ return (struct nvc0_query *)hq;
+ }
+
+ hq = CALLOC_STRUCT(nvc0_hw_query);
+ if (!hq)
+ return NULL;
+
+ q = &hq->base;
+ q->funcs = &hw_query_funcs;
+ q->type = type;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ hq->rotate = 32;
+ space = NVC0_HW_QUERY_ALLOC_SPACE;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ hq->is64bit = true;
+ space = 512;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ hq->is64bit = true;
+ space = 64;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ hq->is64bit = true;
+ q->index = index;
+ space = 32;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_GPU_FINISHED:
+ space = 32;
+ break;
+ case NVC0_HW_QUERY_TFB_BUFFER_OFFSET:
+ space = 16;
+ break;
+ default:
+ debug_printf("invalid query type: %u\n", type);
+ FREE(q);
+ return NULL;
+ }
+
+ if (!nvc0_hw_query_allocate(nvc0, q, space)) {
+ FREE(hq);
+ return NULL;
+ }
+
+ if (hq->rotate) {
+ /* we advance before query_begin ! */
+ hq->offset -= hq->rotate;
+ hq->data -= hq->rotate / sizeof(*hq->data);
+ } else
+ if (!hq->is64bit)
+ hq->data[0] = 0; /* initialize sequence */
+
+ return q;
+}
+
+int
+nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
+
+ num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL);
+ num_hw_metric_queries =
+ nvc0_hw_metric_get_driver_query_info(screen, 0, NULL);
+
+ if (!info)
+ return num_hw_sm_queries + num_hw_metric_queries;
+
+ if (id < num_hw_sm_queries)
+ return nvc0_hw_sm_get_driver_query_info(screen, id, info);
+
+ return nvc0_hw_metric_get_driver_query_info(screen,
+ id - num_hw_sm_queries, info);
+}
+
+void
+nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push,
+ struct nvc0_query *q, unsigned result_offset)
+{
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+
+#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
+
+ PUSH_REFN(push, hq->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
+ nouveau_pushbuf_space(push, 0, 0, 1);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + result_offset, 4 |
+ NVC0_IB_ENTRY_1_NO_PREFETCH);
+}
+
+void
+nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
+{
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+ unsigned offset = hq->offset;
+
+ if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->sequence);
+ PUSH_DATA (push, (1 << 12) |
+ NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
new file mode 100644
index 00000000000..3701eb7100f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
@@ -0,0 +1,56 @@
+#ifndef __NVC0_QUERY_HW_H__
+#define __NVC0_QUERY_HW_H__
+
+#include "nouveau_fence.h"
+#include "nouveau_mm.h"
+
+#include "nvc0_query.h"
+
+#define NVC0_HW_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
+
+struct nvc0_hw_query;
+
+struct nvc0_hw_query_funcs {
+ void (*destroy_query)(struct nvc0_context *, struct nvc0_hw_query *);
+ boolean (*begin_query)(struct nvc0_context *, struct nvc0_hw_query *);
+ void (*end_query)(struct nvc0_context *, struct nvc0_hw_query *);
+ boolean (*get_query_result)(struct nvc0_context *, struct nvc0_hw_query *,
+ boolean, union pipe_query_result *);
+};
+
+struct nvc0_hw_query {
+ struct nvc0_query base;
+ const struct nvc0_hw_query_funcs *funcs;
+ uint32_t *data;
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base_offset;
+ uint32_t offset; /* base_offset + i * rotate */
+ uint8_t state;
+ boolean is64bit;
+ uint8_t rotate;
+ int nesting; /* only used for occlusion queries */
+ struct nouveau_mm_allocation *mm;
+ struct nouveau_fence *fence;
+};
+
+static inline struct nvc0_hw_query *
+nvc0_hw_query(struct nvc0_query *q)
+{
+ return (struct nvc0_hw_query *)q;
+}
+
+struct nvc0_query *
+nvc0_hw_create_query(struct nvc0_context *, unsigned, unsigned);
+int
+nvc0_hw_get_driver_query_info(struct nvc0_screen *, unsigned,
+ struct pipe_driver_query_info *);
+bool
+nvc0_hw_query_allocate(struct nvc0_context *, struct nvc0_query *, int);
+void
+nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *, struct nvc0_query *,
+ unsigned);
+void
+nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nvc0_query *);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
new file mode 100644
index 00000000000..25aa09be42a
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw_metric.h"
+#include "nvc0/nvc0_query_hw_sm.h"
+
+/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */
+static const char *nvc0_hw_metric_names[] =
+{
+ "metric-achieved_occupancy",
+ "metric-branch_efficiency",
+ "metric-inst_issued",
+ "metric-inst_per_wrap",
+ "metric-inst_replay_overhead",
+ "metric-issued_ipc",
+ "metric-issue_slots",
+ "metric-issue_slot_utilization",
+ "metric-ipc",
+};
+
+struct nvc0_hw_metric_query_cfg {
+ uint32_t queries[8];
+ uint32_t num_queries;
+};
+
+#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
+#define _M(n, c) [NVC0_HW_METRIC_QUERY_##n] = c
+
+/* ==== Compute capability 2.0 (GF100/GF110) ==== */
+static const struct nvc0_hw_metric_query_cfg
+sm20_achieved_occupancy =
+{
+ .queries[0] = _SM(ACTIVE_WARPS),
+ .queries[1] = _SM(ACTIVE_CYCLES),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_branch_efficiency =
+{
+ .queries[0] = _SM(BRANCH),
+ .queries[1] = _SM(DIVERGENT_BRANCH),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_inst_per_wrap =
+{
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(WARPS_LAUNCHED),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_inst_replay_overhead =
+{
+ .queries[0] = _SM(INST_ISSUED),
+ .queries[1] = _SM(INST_EXECUTED),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_issued_ipc =
+{
+ .queries[0] = _SM(INST_ISSUED),
+ .queries[1] = _SM(ACTIVE_CYCLES),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_ipc =
+{
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(ACTIVE_CYCLES),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
+{
+ _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy),
+ _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency),
+ _M(INST_ISSUED, NULL),
+ _M(INST_PER_WRAP, &sm20_inst_per_wrap),
+ _M(INST_REPLAY_OVERHEAD, &sm20_inst_replay_overhead),
+ _M(ISSUED_IPC, &sm20_issued_ipc),
+ _M(ISSUE_SLOTS, NULL),
+ _M(ISSUE_SLOT_UTILIZATION, &sm20_issued_ipc),
+ _M(IPC, &sm20_ipc),
+};
+
+/* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
+static const struct nvc0_hw_metric_query_cfg
+sm21_inst_issued =
+{
+ .queries[0] = _SM(INST_ISSUED1_0),
+ .queries[1] = _SM(INST_ISSUED1_1),
+ .queries[2] = _SM(INST_ISSUED2_0),
+ .queries[3] = _SM(INST_ISSUED2_1),
+ .num_queries = 4,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm21_inst_replay_overhead =
+{
+ .queries[0] = _SM(INST_ISSUED1_0),
+ .queries[1] = _SM(INST_ISSUED1_1),
+ .queries[2] = _SM(INST_ISSUED2_0),
+ .queries[3] = _SM(INST_ISSUED2_1),
+ .queries[4] = _SM(INST_EXECUTED),
+ .num_queries = 5,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm21_issued_ipc =
+{
+ .queries[0] = _SM(INST_ISSUED1_0),
+ .queries[1] = _SM(INST_ISSUED1_1),
+ .queries[2] = _SM(INST_ISSUED2_0),
+ .queries[3] = _SM(INST_ISSUED2_1),
+ .queries[4] = _SM(ACTIVE_CYCLES),
+ .num_queries = 5,
+};
+
+static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
+{
+ _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy),
+ _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency),
+ _M(INST_ISSUED, &sm21_inst_issued),
+ _M(INST_PER_WRAP, &sm20_inst_per_wrap),
+ _M(INST_REPLAY_OVERHEAD, &sm21_inst_replay_overhead),
+ _M(ISSUED_IPC, &sm21_issued_ipc),
+ _M(ISSUE_SLOTS, &sm21_inst_issued),
+ _M(ISSUE_SLOT_UTILIZATION, &sm21_issued_ipc),
+ _M(IPC, &sm20_ipc),
+};
+
+#undef _SM
+#undef _M
+
+static inline const struct nvc0_hw_metric_query_cfg **
+nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
+{
+ struct nouveau_device *dev = screen->base.device;
+
+ if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
+ return sm20_hw_metric_queries;
+ return sm21_hw_metric_queries;
+}
+
+static const struct nvc0_hw_metric_query_cfg *
+nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0,
+ struct nvc0_hw_query *hq)
+{
+ const struct nvc0_hw_metric_query_cfg **queries;
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nvc0_query *q = &hq->base;
+
+ queries = nvc0_hw_metric_get_queries(screen);
+ return queries[q->type - NVC0_HW_METRIC_QUERY(0)];
+}
+
+static void
+nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
+ struct nvc0_hw_query *hq)
+{
+ struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
+ FREE(hmq);
+}
+
+static boolean
+nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+ struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
+ boolean ret = false;
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++) {
+ ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
+ if (!ret)
+ return ret;
+ }
+ return ret;
+}
+
+static void
+nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+ struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
+}
+
+static uint64_t
+sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
+{
+ switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
+ case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
+ /* (active_warps / active_cycles) / max. number of warps on a MP */
+ if (res64[1])
+ return (res64[0] / (double)res64[1]) / 48;
+ break;
+ case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
+ /* (branch / (branch + divergent_branch)) * 100 */
+ if (res64[0] + res64[1])
+ return (res64[0] / (double)(res64[0] + res64[1])) * 100;
+ break;
+ case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
+ /* inst_executed / warps_launched */
+ if (res64[1])
+ return res64[0] / (double)res64[1];
+ break;
+ case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
+ /* (inst_issued - inst_executed) / inst_executed */
+ if (res64[1])
+ return (res64[0] - res64[1]) / (double)res64[1];
+ break;
+ case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
+ /* inst_issued / active_cycles */
+ if (res64[1])
+ return res64[0] / (double)res64[1];
+ break;
+ case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
+ /* ((inst_issued / 2) / active_cycles) * 100 */
+ if (res64[1])
+ return ((res64[0] / 2) / (double)res64[1]) * 100;
+ break;
+ case NVC0_HW_METRIC_QUERY_IPC:
+ /* inst_executed / active_cycles */
+ if (res64[1])
+ return res64[0] / (double)res64[1];
+ break;
+ default:
+ debug_printf("invalid metric type: %d\n",
+ hq->base.type - NVC0_HW_METRIC_QUERY(0));
+ break;
+ }
+ return 0;
+}
+
+static uint64_t
+sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
+{
+ switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
+ case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
+ return sm20_hw_metric_calc_result(hq, res64);
+ case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
+ return sm20_hw_metric_calc_result(hq, res64);
+ case NVC0_HW_METRIC_QUERY_INST_ISSUED:
+ /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
+ return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
+ break;
+ case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
+ return sm20_hw_metric_calc_result(hq, res64);
+ case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
+ /* (metric-inst_issued - inst_executed) / inst_executed */
+ if (res64[4])
+ return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
+ res64[4]) / (double)res64[4]);
+ break;
+ case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
+ /* metric-inst_issued / active_cycles */
+ if (res64[4])
+ return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
+ (double)res64[4];
+ break;
+ case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
+ /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
+ return res64[0] + res64[1] + res64[2] + res64[3];
+ break;
+ case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
+ /* ((metric-issue_slots / 2) / active_cycles) * 100 */
+ if (res64[4])
+ return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
+ (double)res64[4]) * 100;
+ break;
+ case NVC0_HW_METRIC_QUERY_IPC:
+ return sm20_hw_metric_calc_result(hq, res64);
+ default:
+ debug_printf("invalid metric type: %d\n",
+ hq->base.type - NVC0_HW_METRIC_QUERY(0));
+ break;
+ }
+ return 0;
+}
+
+static boolean
+nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
+ struct nvc0_hw_query *hq, boolean wait,
+ union pipe_query_result *result)
+{
+ struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_device *dev = screen->base.device;
+ union pipe_query_result results[8] = {};
+ uint64_t res64[8] = {};
+ uint64_t value = 0;
+ boolean ret = false;
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++) {
+ ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i],
+ wait, &results[i]);
+ if (!ret)
+ return ret;
+ res64[i] = *(uint64_t *)&results[i];
+ }
+
+ if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
+ value = sm20_hw_metric_calc_result(hq, res64);
+ else
+ value = sm21_hw_metric_calc_result(hq, res64);
+
+ *(uint64_t *)result = value;
+ return ret;
+}
+
+static const struct nvc0_hw_query_funcs hw_metric_query_funcs = {
+ .destroy_query = nvc0_hw_metric_destroy_query,
+ .begin_query = nvc0_hw_metric_begin_query,
+ .end_query = nvc0_hw_metric_end_query,
+ .get_query_result = nvc0_hw_metric_get_query_result,
+};
+
+struct nvc0_hw_query *
+nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
+{
+ const struct nvc0_hw_metric_query_cfg *cfg;
+ struct nvc0_hw_metric_query *hmq;
+ struct nvc0_hw_query *hq;
+ unsigned i;
+
+ if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
+ return NULL;
+
+ hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
+ if (!hmq)
+ return NULL;
+
+ hq = &hmq->base;
+ hq->funcs = &hw_metric_query_funcs;
+ hq->base.type = type;
+
+ cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq);
+
+ for (i = 0; i < cfg->num_queries; i++) {
+ hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]);
+ if (!hmq->queries[i]) {
+ nvc0_hw_metric_destroy_query(nvc0, hq);
+ return NULL;
+ }
+ hmq->num_queries++;
+ }
+
+ return hq;
+}
+
+static int
+nvc0_hw_metric_get_next_query_id(const struct nvc0_hw_metric_query_cfg **queries,
+ unsigned id)
+{
+ unsigned i, next = 0;
+
+ for (i = 0; i < NVC0_HW_METRIC_QUERY_COUNT; i++) {
+ if (!queries[i]) {
+ next++;
+ } else
+ if (i >= id && queries[id + next]) {
+ break;
+ }
+ }
+ return id + next;
+}
+
+int
+nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ uint16_t class_3d = screen->base.class_3d;
+ int count = 0;
+
+ if (screen->base.device->drm_version >= 0x01000101) {
+ if (screen->compute) {
+ if (class_3d < NVE4_3D_CLASS) {
+ const struct nvc0_hw_metric_query_cfg **queries =
+ nvc0_hw_metric_get_queries(screen);
+ unsigned i;
+
+ for (i = 0; i < NVC0_HW_METRIC_QUERY_COUNT; i++) {
+ if (queries[i])
+ count++;
+ }
+ }
+ }
+ }
+
+ if (!info)
+ return count;
+
+ if (id < count) {
+ if (screen->compute) {
+ if (class_3d < NVE4_3D_CLASS) {
+ const struct nvc0_hw_metric_query_cfg **queries =
+ nvc0_hw_metric_get_queries(screen);
+
+ id = nvc0_hw_metric_get_next_query_id(queries, id);
+ info->name = nvc0_hw_metric_names[id];
+ info->query_type = NVC0_HW_METRIC_QUERY(id);
+ info->group_id = -1;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
new file mode 100644
index 00000000000..95675fd19b7
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
@@ -0,0 +1,42 @@
+#ifndef __NVC0_QUERY_HW_METRIC_H__
+#define __NVC0_QUERY_HW_METRIC_H__
+
+#include "nvc0_query_hw.h"
+
+struct nvc0_hw_metric_query {
+ struct nvc0_hw_query base;
+ struct nvc0_hw_query *queries[8];
+ unsigned num_queries;
+};
+
+static inline struct nvc0_hw_metric_query *
+nvc0_hw_metric_query(struct nvc0_hw_query *hq)
+{
+ return (struct nvc0_hw_metric_query *)hq;
+}
+
+/*
+ * Driver metrics queries:
+ */
+#define NVC0_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 3072 + (i))
+#define NVC0_HW_METRIC_QUERY_LAST NVC0_HW_METRIC_QUERY(NVC0_HW_METRIC_QUERY_COUNT - 1)
+enum nvc0_hw_metric_queries
+{
+ NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY = 0,
+ NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
+ NVC0_HW_METRIC_QUERY_INST_ISSUED,
+ NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
+ NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
+ NVC0_HW_METRIC_QUERY_ISSUED_IPC,
+ NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
+ NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
+ NVC0_HW_METRIC_QUERY_IPC,
+ NVC0_HW_METRIC_QUERY_COUNT
+};
+
+struct nvc0_hw_query *
+nvc0_hw_metric_create_query(struct nvc0_context *, unsigned);
+int
+nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *, unsigned,
+ struct pipe_driver_query_info *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
new file mode 100644
index 00000000000..44b222e5134
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -0,0 +1,1387 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw_sm.h"
+
+#include "nv_object.xml.h"
+#include "nvc0/nve4_compute.xml.h"
+#include "nvc0/nvc0_compute.xml.h"
+
+/* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
+
+/* NOTE: intentionally using the same names as NV */
+static const char *nve4_hw_sm_query_names[] =
+{
+ /* MP counters */
+ "active_cycles",
+ "active_warps",
+ "atom_count",
+ "branch",
+ "divergent_branch",
+ "gld_request",
+ "global_ld_mem_divergence_replays",
+ "global_store_transaction",
+ "global_st_mem_divergence_replays",
+ "gred_count",
+ "gst_request",
+ "inst_executed",
+ "inst_issued",
+ "inst_issued1",
+ "inst_issued2",
+ "l1_global_load_hit",
+ "l1_global_load_miss",
+ "l1_local_load_hit",
+ "l1_local_load_miss",
+ "l1_local_store_hit",
+ "l1_local_store_miss",
+ "l1_shared_load_transactions",
+ "l1_shared_store_transactions",
+ "local_load",
+ "local_load_transactions",
+ "local_store",
+ "local_store_transactions",
+ "prof_trigger_00",
+ "prof_trigger_01",
+ "prof_trigger_02",
+ "prof_trigger_03",
+ "prof_trigger_04",
+ "prof_trigger_05",
+ "prof_trigger_06",
+ "prof_trigger_07",
+ "shared_load",
+ "shared_load_replay",
+ "shared_store",
+ "shared_store_replay",
+ "sm_cta_launched",
+ "threads_launched",
+ "uncached_global_load_transaction",
+ "warps_launched",
+ /* metrics, i.e. functions of the MP counters */
+ "metric-ipc", /* inst_executed, clock */
+ "metric-ipac", /* inst_executed, active_cycles */
+ "metric-ipec", /* inst_executed, (bool)inst_executed */
+ "metric-achieved_occupancy", /* active_warps, active_cycles */
+ "metric-sm_efficiency", /* active_cycles, clock */
+ "metric-inst_replay_overhead" /* inst_issued, inst_executed */
+};
+
+/* Code to read out MP counters: They are accessible via mmio, too, but let's
+ * just avoid mapping registers in userspace. We'd have to know which MPs are
+ * enabled/present, too, and that information is not presently exposed.
+ * We could add a kernel interface for it, but reading the counters like this
+ * has the advantage of being async (if get_result isn't called immediately).
+ */
+static const uint64_t nve4_read_hw_sm_counters_code[] =
+{
+ /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
+ * mov b32 $r8 $tidx
+ * mov b32 $r12 $physid
+ * mov b32 $r0 $pm0
+ * mov b32 $r1 $pm1
+ * mov b32 $r2 $pm2
+ * mov b32 $r3 $pm3
+ * mov b32 $r4 $pm4
+ * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
+ * mov b32 $r5 $pm5
+ * mov b32 $r6 $pm6
+ * mov b32 $r7 $pm7
+ * set $p0 0x1 eq u32 $r8 0x0
+ * mov b32 $r10 c0[0x0]
+ * ext u32 $r8 $r12 0x414
+ * mov b32 $r11 c0[0x4]
+ * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
+ * ext u32 $r9 $r12 0x208
+ * (not $p0) exit
+ * set $p1 0x1 eq u32 $r9 0x0
+ * mul $r8 u32 $r8 u32 96
+ * mul $r12 u32 $r9 u32 16
+ * mul $r13 u32 $r9 u32 4
+ * add b32 $r9 $r8 $r13
+ * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
+ * add b32 $r8 $r8 $r12
+ * mov b32 $r12 $r10
+ * add b32 $r10 $c $r10 $r8
+ * mov b32 $r13 $r11
+ * add b32 $r11 $r11 0x0 $c
+ * add b32 $r12 $c $r12 $r9
+ * st b128 wt g[$r10d] $r0q
+ * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
+ * mov b32 $r0 c0[0x8]
+ * add b32 $r13 $r13 0x0 $c
+ * $p1 st b128 wt g[$r12d+0x40] $r4q
+ * st b32 wt g[$r12d+0x50] $r0
+ * exit */
+ 0x2202020202020207ULL,
+ 0x2c00000084021c04ULL,
+ 0x2c0000000c031c04ULL,
+ 0x2c00000010001c04ULL,
+ 0x2c00000014005c04ULL,
+ 0x2c00000018009c04ULL,
+ 0x2c0000001c00dc04ULL,
+ 0x2c00000020011c04ULL,
+ 0x22b0420042320207ULL,
+ 0x2c00000024015c04ULL,
+ 0x2c00000028019c04ULL,
+ 0x2c0000002c01dc04ULL,
+ 0x190e0000fc81dc03ULL,
+ 0x2800400000029de4ULL,
+ 0x7000c01050c21c03ULL,
+ 0x280040001002dde4ULL,
+ 0x204282020042e047ULL,
+ 0x7000c00820c25c03ULL,
+ 0x80000000000021e7ULL,
+ 0x190e0000fc93dc03ULL,
+ 0x1000000180821c02ULL,
+ 0x1000000040931c02ULL,
+ 0x1000000010935c02ULL,
+ 0x4800000034825c03ULL,
+ 0x22c042c042c04287ULL,
+ 0x4800000030821c03ULL,
+ 0x2800000028031de4ULL,
+ 0x4801000020a29c03ULL,
+ 0x280000002c035de4ULL,
+ 0x0800000000b2dc42ULL,
+ 0x4801000024c31c03ULL,
+ 0x9400000000a01fc5ULL,
+ 0x200002e04202c047ULL,
+ 0x2800400020001de4ULL,
+ 0x0800000000d35c42ULL,
+ 0x9400000100c107c5ULL,
+ 0x9400000140c01f85ULL,
+ 0x8000000000001de7ULL
+};
+
+/* For simplicity, we will allocate as many group slots as we allocate counter
+ * slots. This means that a single counter which wants to source from 2 groups
+ * will have to be declared as using 2 counter slots. This shouldn't really be
+ * a problem because such queries don't make much sense ... (unless someone is
+ * really creative).
+ */
+struct nvc0_hw_sm_counter_cfg
+{
+ uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
+ uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
+ uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
+ uint32_t sig_sel : 8; /* signal group */
+ uint32_t src_mask; /* mask for signal selection (only for NVC0:NVE4) */
+ uint32_t src_sel; /* signal selection for up to 4 sources */
+};
+
+#define NVC0_COUNTER_OPn_SUM 0
+#define NVC0_COUNTER_OPn_OR 1
+#define NVC0_COUNTER_OPn_AND 2
+#define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
+#define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
+#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
+#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
+
+struct nvc0_hw_sm_query_cfg
+{
+ struct nvc0_hw_sm_counter_cfg ctr[8];
+ uint8_t num_counters;
+ uint8_t op;
+ uint8_t norm[2]; /* normalization num,denom */
+};
+
+#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, 0, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
+#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, 0, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
+#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
+ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, 0, s0 }, \
+ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, 0, s1 }, \
+ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
+#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
+ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, 0, s0 }, \
+ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, 0, s1 }, \
+ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
+#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
+ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, 0, s0 }, \
+ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, 0, s1 }, \
+ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
+
+/* NOTES:
+ * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
+ * inst_executed etc.: we only count a single warp scheduler
+ * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
+ * this is inaccurate !
+ */
+static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] =
+{
+ _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
+ _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
+ _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
+ _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
+ _Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
+ _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
+ _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
+ _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
+ _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
+ _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
+ _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
+ _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
+ _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
+ _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
+ _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
+ _Q1B(L1_GLD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
+ _Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
+ _Q1B(L1_LOCAL_LD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
+ _Q1B(L1_LOCAL_LD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
+ _Q1B(L1_LOCAL_ST_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
+ _Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
+ _Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
+ _Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
+ _Q1A(LOCAL_LD, 0x0001, B6, LDST, 0x00000008, 1, 1),
+ _Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
+ _Q1A(LOCAL_ST, 0x0001, B6, LDST, 0x0000000c, 1, 1),
+ _Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
+ _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
+ _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
+ _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
+ _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
+ _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
+ _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
+ _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
+ _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
+ _Q1A(SHARED_LD, 0x0001, B6, LDST, 0x00000000, 1, 1),
+ _Q1B(SHARED_LD_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
+ _Q1A(SHARED_ST, 0x0001, B6, LDST, 0x00000004, 1, 1),
+ _Q1B(SHARED_ST_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
+ _Q1B(SM_CTA_LAUNCHED, 0x0001, B6, WARP, 0x0000001c, 1, 1),
+ _Q1A(THREADS_LAUNCHED, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
+ _Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1),
+ _Q1A(WARPS_LAUNCHED, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
+ _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
+ _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
+ _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
+ _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
+ _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
+ _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
+};
+
+#undef _Q1A
+#undef _Q1B
+#undef _M2A
+#undef _M2B
+
+/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
+/* NOTES:
+ * - MP counters on GF100/GF110 (compute capability 2.0) are buggy
+ * because there is a context-switch problem that we need to fix.
+ * Results might be wrong sometimes, be careful!
+ */
+static const char *nvc0_hw_sm_query_names[] =
+{
+ /* MP counters */
+ "active_cycles",
+ "active_warps",
+ "atom_count",
+ "branch",
+ "divergent_branch",
+ "gld_request",
+ "gred_count",
+ "gst_request",
+ "inst_executed",
+ "inst_issued",
+ "inst_issued1_0",
+ "inst_issued1_1",
+ "inst_issued2_0",
+ "inst_issued2_1",
+ "local_load",
+ "local_store",
+ "prof_trigger_00",
+ "prof_trigger_01",
+ "prof_trigger_02",
+ "prof_trigger_03",
+ "prof_trigger_04",
+ "prof_trigger_05",
+ "prof_trigger_06",
+ "prof_trigger_07",
+ "shared_load",
+ "shared_store",
+ "threads_launched",
+ "thread_inst_executed_0",
+ "thread_inst_executed_1",
+ "thread_inst_executed_2",
+ "thread_inst_executed_3",
+ "warps_launched",
+};
+
+static const uint64_t nvc0_read_hw_sm_counters_code[] =
+{
+ /* mov b32 $r8 $tidx
+ * mov b32 $r9 $physid
+ * mov b32 $r0 $pm0
+ * mov b32 $r1 $pm1
+ * mov b32 $r2 $pm2
+ * mov b32 $r3 $pm3
+ * mov b32 $r4 $pm4
+ * mov b32 $r5 $pm5
+ * mov b32 $r6 $pm6
+ * mov b32 $r7 $pm7
+ * set $p0 0x1 eq u32 $r8 0x0
+ * mov b32 $r10 c0[0x0]
+ * mov b32 $r11 c0[0x4]
+ * ext u32 $r8 $r9 0x414
+ * (not $p0) exit
+ * mul $r8 u32 $r8 u32 48
+ * add b32 $r10 $c $r10 $r8
+ * add b32 $r11 $r11 0x0 $c
+ * mov b32 $r8 c0[0x8]
+ * st b128 wt g[$r10d+0x00] $r0q
+ * st b128 wt g[$r10d+0x10] $r4q
+ * st b32 wt g[$r10d+0x20] $r8
+ * exit */
+ 0x2c00000084021c04ULL,
+ 0x2c0000000c025c04ULL,
+ 0x2c00000010001c04ULL,
+ 0x2c00000014005c04ULL,
+ 0x2c00000018009c04ULL,
+ 0x2c0000001c00dc04ULL,
+ 0x2c00000020011c04ULL,
+ 0x2c00000024015c04ULL,
+ 0x2c00000028019c04ULL,
+ 0x2c0000002c01dc04ULL,
+ 0x190e0000fc81dc03ULL,
+ 0x2800400000029de4ULL,
+ 0x280040001002dde4ULL,
+ 0x7000c01050921c03ULL,
+ 0x80000000000021e7ULL,
+ 0x10000000c0821c02ULL,
+ 0x4801000020a29c03ULL,
+ 0x0800000000b2dc42ULL,
+ 0x2800400020021de4ULL,
+ 0x9400000000a01fc5ULL,
+ 0x9400000040a11fc5ULL,
+ 0x9400000080a21f85ULL,
+ 0x8000000000001de7ULL
+};
+
+#define _C(f, o, g, m, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, 0, g, m, s }
+#define _Q(n, c) [NVC0_HW_SM_QUERY_##n] = c
+
+/* ==== Compute capability 2.0 (GF100/GF110) ==== */
+static const struct nvc0_hw_sm_query_cfg
+sm20_active_cycles =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x11, 0x000000ff, 0x00000000),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_active_warps =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x24, 0x000000ff, 0x00000010),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0x24, 0x000000ff, 0x00000020),
+ .ctr[2] = _C(0xaaaa, LOGOP, 0x24, 0x000000ff, 0x00000030),
+ .ctr[3] = _C(0xaaaa, LOGOP, 0x24, 0x000000ff, 0x00000040),
+ .ctr[4] = _C(0xaaaa, LOGOP, 0x24, 0x000000ff, 0x00000050),
+ .ctr[5] = _C(0xaaaa, LOGOP, 0x24, 0x000000ff, 0x00000060),
+ .num_counters = 6,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_atom_count =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x63, 0x000000ff, 0x00000030),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_branch =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x1a, 0x000000ff, 0x00000000),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0x1a, 0x000000ff, 0x00000010),
+ .num_counters = 2,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_divergent_branch =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x19, 0x000000ff, 0x00000020),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0x19, 0x000000ff, 0x00000030),
+ .num_counters = 2,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_gld_request =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000030),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_gred_count =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x63, 0x000000ff, 0x00000040),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_gst_request =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000060),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_inst_executed =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x2d, 0x0000ffff, 0x00001000),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0x2d, 0x0000ffff, 0x00001010),
+ .num_counters = 2,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_inst_issued =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x27, 0x0000ffff, 0x00007060),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0x27, 0x0000ffff, 0x00007070),
+ .num_counters = 2,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_local_ld =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000020),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_local_st =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000050),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_prof_trigger_0 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000000),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_prof_trigger_1 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000010),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_prof_trigger_2 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000020),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_prof_trigger_3 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000030),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_prof_trigger_4 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000040),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_prof_trigger_5 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000050),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_prof_trigger_6 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000060),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_prof_trigger_7 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000070),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_shared_ld =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000010),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_shared_st =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000040),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_threads_launched =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000010),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000020),
+ .ctr[2] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000030),
+ .ctr[3] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000040),
+ .ctr[4] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000050),
+ .ctr[5] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000060),
+ .num_counters = 6,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_th_inst_executed_0 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x2f, 0x000000ff, 0x00000000),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0x2f, 0x000000ff, 0x00000010),
+ .ctr[2] = _C(0xaaaa, LOGOP, 0x2f, 0x000000ff, 0x00000020),
+ .ctr[3] = _C(0xaaaa, LOGOP, 0x2f, 0x000000ff, 0x00000030),
+ .ctr[4] = _C(0xaaaa, LOGOP, 0x2f, 0x000000ff, 0x00000040),
+ .ctr[5] = _C(0xaaaa, LOGOP, 0x2f, 0x000000ff, 0x00000050),
+ .num_counters = 6,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_th_inst_executed_1 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x30, 0x000000ff, 0x00000000),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0x30, 0x000000ff, 0x00000010),
+ .ctr[2] = _C(0xaaaa, LOGOP, 0x30, 0x000000ff, 0x00000020),
+ .ctr[3] = _C(0xaaaa, LOGOP, 0x30, 0x000000ff, 0x00000030),
+ .ctr[4] = _C(0xaaaa, LOGOP, 0x30, 0x000000ff, 0x00000040),
+ .ctr[5] = _C(0xaaaa, LOGOP, 0x30, 0x000000ff, 0x00000050),
+ .num_counters = 6,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm20_warps_launched =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000000),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg *sm20_hw_sm_queries[] =
+{
+ _Q(ACTIVE_CYCLES, &sm20_active_cycles),
+ _Q(ACTIVE_WARPS, &sm20_active_warps),
+ _Q(ATOM_COUNT, &sm20_atom_count),
+ _Q(BRANCH, &sm20_branch),
+ _Q(DIVERGENT_BRANCH, &sm20_divergent_branch),
+ _Q(GLD_REQUEST, &sm20_gld_request),
+ _Q(GRED_COUNT, &sm20_gred_count),
+ _Q(GST_REQUEST, &sm20_gst_request),
+ _Q(INST_EXECUTED, &sm20_inst_executed),
+ _Q(INST_ISSUED, &sm20_inst_issued),
+ _Q(INST_ISSUED1_0, NULL),
+ _Q(INST_ISSUED1_1, NULL),
+ _Q(INST_ISSUED2_0, NULL),
+ _Q(INST_ISSUED2_1, NULL),
+ _Q(LOCAL_LD, &sm20_local_ld),
+ _Q(LOCAL_ST, &sm20_local_st),
+ _Q(PROF_TRIGGER_0, &sm20_prof_trigger_0),
+ _Q(PROF_TRIGGER_1, &sm20_prof_trigger_1),
+ _Q(PROF_TRIGGER_2, &sm20_prof_trigger_2),
+ _Q(PROF_TRIGGER_3, &sm20_prof_trigger_3),
+ _Q(PROF_TRIGGER_4, &sm20_prof_trigger_4),
+ _Q(PROF_TRIGGER_5, &sm20_prof_trigger_5),
+ _Q(PROF_TRIGGER_6, &sm20_prof_trigger_6),
+ _Q(PROF_TRIGGER_7, &sm20_prof_trigger_7),
+ _Q(SHARED_LD, &sm20_shared_ld),
+ _Q(SHARED_ST, &sm20_shared_st),
+ _Q(THREADS_LAUNCHED, &sm20_threads_launched),
+ _Q(TH_INST_EXECUTED_0, &sm20_th_inst_executed_0),
+ _Q(TH_INST_EXECUTED_1, &sm20_th_inst_executed_1),
+ _Q(TH_INST_EXECUTED_2, NULL),
+ _Q(TH_INST_EXECUTED_3, NULL),
+ _Q(WARPS_LAUNCHED, &sm20_warps_launched),
+};
+
+/* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
+static const struct nvc0_hw_sm_query_cfg
+sm21_inst_executed =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x2d, 0x000000ff, 0x00000000),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0x2d, 0x000000ff, 0x00000010),
+ .ctr[2] = _C(0xaaaa, LOGOP, 0x2d, 0x000000ff, 0x00000020),
+ .num_counters = 3,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm21_inst_issued1_0 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x7e, 0x000000ff, 0x00000010),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm21_inst_issued1_1 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x7e, 0x000000ff, 0x00000040),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm21_inst_issued2_0 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x7e, 0x000000ff, 0x00000020),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm21_inst_issued2_1 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0x7e, 0x000000ff, 0x00000050),
+ .num_counters = 1,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm21_th_inst_executed_0 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0xa3, 0x000000ff, 0x00000000),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0xa3, 0x000000ff, 0x00000010),
+ .ctr[2] = _C(0xaaaa, LOGOP, 0xa3, 0x000000ff, 0x00000020),
+ .ctr[3] = _C(0xaaaa, LOGOP, 0xa3, 0x000000ff, 0x00000030),
+ .ctr[4] = _C(0xaaaa, LOGOP, 0xa3, 0x000000ff, 0x00000040),
+ .ctr[5] = _C(0xaaaa, LOGOP, 0xa3, 0x000000ff, 0x00000050),
+ .num_counters = 6,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm21_th_inst_executed_1 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0xa5, 0x000000ff, 0x00000000),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0xa5, 0x000000ff, 0x00000010),
+ .ctr[2] = _C(0xaaaa, LOGOP, 0xa5, 0x000000ff, 0x00000020),
+ .ctr[3] = _C(0xaaaa, LOGOP, 0xa5, 0x000000ff, 0x00000030),
+ .ctr[4] = _C(0xaaaa, LOGOP, 0xa5, 0x000000ff, 0x00000040),
+ .ctr[5] = _C(0xaaaa, LOGOP, 0xa5, 0x000000ff, 0x00000050),
+ .num_counters = 6,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm21_th_inst_executed_2 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0xa4, 0x000000ff, 0x00000000),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0xa4, 0x000000ff, 0x00000010),
+ .ctr[2] = _C(0xaaaa, LOGOP, 0xa4, 0x000000ff, 0x00000020),
+ .ctr[3] = _C(0xaaaa, LOGOP, 0xa4, 0x000000ff, 0x00000030),
+ .ctr[4] = _C(0xaaaa, LOGOP, 0xa4, 0x000000ff, 0x00000040),
+ .ctr[5] = _C(0xaaaa, LOGOP, 0xa4, 0x000000ff, 0x00000050),
+ .num_counters = 6,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm21_th_inst_executed_3 =
+{
+ .ctr[0] = _C(0xaaaa, LOGOP, 0xa6, 0x000000ff, 0x00000000),
+ .ctr[1] = _C(0xaaaa, LOGOP, 0xa6, 0x000000ff, 0x00000010),
+ .ctr[2] = _C(0xaaaa, LOGOP, 0xa6, 0x000000ff, 0x00000020),
+ .ctr[3] = _C(0xaaaa, LOGOP, 0xa6, 0x000000ff, 0x00000030),
+ .ctr[4] = _C(0xaaaa, LOGOP, 0xa6, 0x000000ff, 0x00000040),
+ .ctr[5] = _C(0xaaaa, LOGOP, 0xa6, 0x000000ff, 0x00000050),
+ .num_counters = 6,
+ .op = NVC0_COUNTER_OPn_SUM,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg *sm21_hw_sm_queries[] =
+{
+ _Q(ACTIVE_CYCLES, &sm20_active_cycles),
+ _Q(ACTIVE_WARPS, &sm20_active_warps),
+ _Q(ATOM_COUNT, &sm20_atom_count),
+ _Q(BRANCH, &sm20_branch),
+ _Q(DIVERGENT_BRANCH, &sm20_divergent_branch),
+ _Q(GLD_REQUEST, &sm20_gld_request),
+ _Q(GRED_COUNT, &sm20_gred_count),
+ _Q(GST_REQUEST, &sm20_gst_request),
+ _Q(INST_EXECUTED, &sm21_inst_executed),
+ _Q(INST_ISSUED, NULL),
+ _Q(INST_ISSUED1_0, &sm21_inst_issued1_0),
+ _Q(INST_ISSUED1_1, &sm21_inst_issued1_1),
+ _Q(INST_ISSUED2_0, &sm21_inst_issued2_0),
+ _Q(INST_ISSUED2_1, &sm21_inst_issued2_1),
+ _Q(LOCAL_LD, &sm20_local_ld),
+ _Q(LOCAL_ST, &sm20_local_st),
+ _Q(PROF_TRIGGER_0, &sm20_prof_trigger_0),
+ _Q(PROF_TRIGGER_1, &sm20_prof_trigger_1),
+ _Q(PROF_TRIGGER_2, &sm20_prof_trigger_2),
+ _Q(PROF_TRIGGER_3, &sm20_prof_trigger_3),
+ _Q(PROF_TRIGGER_4, &sm20_prof_trigger_4),
+ _Q(PROF_TRIGGER_5, &sm20_prof_trigger_5),
+ _Q(PROF_TRIGGER_6, &sm20_prof_trigger_6),
+ _Q(PROF_TRIGGER_7, &sm20_prof_trigger_7),
+ _Q(SHARED_LD, &sm20_shared_ld),
+ _Q(SHARED_ST, &sm20_shared_st),
+ _Q(THREADS_LAUNCHED, &sm20_threads_launched),
+ _Q(TH_INST_EXECUTED_0, &sm21_th_inst_executed_0),
+ _Q(TH_INST_EXECUTED_1, &sm21_th_inst_executed_1),
+ _Q(TH_INST_EXECUTED_2, &sm21_th_inst_executed_2),
+ _Q(TH_INST_EXECUTED_3, &sm21_th_inst_executed_3),
+ _Q(WARPS_LAUNCHED, &sm20_warps_launched),
+};
+
+#undef _Q
+#undef _C
+
+static inline const struct nvc0_hw_sm_query_cfg **
+nvc0_hw_sm_get_queries(struct nvc0_screen *screen)
+{
+ struct nouveau_device *dev = screen->base.device;
+
+ if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
+ return sm20_hw_sm_queries;
+ return sm21_hw_sm_queries;
+}
+
+static const struct nvc0_hw_sm_query_cfg *
+nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nvc0_query *q = &hq->base;
+
+ if (screen->base.class_3d >= NVE4_3D_CLASS)
+ return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
+
+ if (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST) {
+ const struct nvc0_hw_sm_query_cfg **queries =
+ nvc0_hw_sm_get_queries(screen);
+ return queries[q->type - NVC0_HW_SM_QUERY(0)];
+ }
+ debug_printf("invalid query type: %d\n", q->type);
+ return NULL;
+}
+
+static void
+nvc0_hw_sm_destroy_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+ struct nvc0_query *q = &hq->base;
+ q->funcs->destroy_query(nvc0, q);
+}
+
+static boolean
+nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
+ const struct nvc0_hw_sm_query_cfg *cfg;
+ unsigned i, c;
+ unsigned num_ab[2] = { 0, 0 };
+
+ cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq);
+
+ /* check if we have enough free counter slots */
+ for (i = 0; i < cfg->num_counters; ++i)
+ num_ab[cfg->ctr[i].sig_dom]++;
+
+ if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
+ screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
+ NOUVEAU_ERR("Not enough free MP counter slots !\n");
+ return false;
+ }
+
+ assert(cfg->num_counters <= 4);
+ PUSH_SPACE(push, 4 * 8 * + 6);
+
+ if (!screen->pm.mp_counters_enabled) {
+ screen->pm.mp_counters_enabled = true;
+ BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
+ PUSH_DATA (push, 0x1fcb);
+ }
+
+ /* set sequence field to 0 (used to check if result is available) */
+ for (i = 0; i < screen->mp_count; ++i)
+ hq->data[i * 10 + 10] = 0;
+ hq->sequence++;
+
+ for (i = 0; i < cfg->num_counters; ++i) {
+ const unsigned d = cfg->ctr[i].sig_dom;
+
+ if (!screen->pm.num_hw_sm_active[d]) {
+ uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
+ if (screen->pm.num_hw_sm_active[!d])
+ m |= 1 << (7 + (8 * d));
+ BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
+ PUSH_DATA (push, m);
+ }
+ screen->pm.num_hw_sm_active[d]++;
+
+ for (c = d * 4; c < (d * 4 + 4); ++c) {
+ if (!screen->pm.mp_counter[c]) {
+ hsq->ctr[i] = c;
+ screen->pm.mp_counter[c] = hsq;
+ break;
+ }
+ }
+ assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
+
+ /* configure and reset the counter(s) */
+ if (d == 0)
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
+ else
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
+ PUSH_DATA (push, cfg->ctr[i].sig_sel);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
+ PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
+ PUSH_DATA (push, 0);
+ }
+ return true;
+}
+
+static boolean
+nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
+ const struct nvc0_hw_sm_query_cfg *cfg;
+ unsigned i, c;
+
+ if (screen->base.class_3d >= NVE4_3D_CLASS)
+ return nve4_hw_sm_begin_query(nvc0, hq);
+
+ cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq);
+
+ /* check if we have enough free counter slots */
+ if (screen->pm.num_hw_sm_active[0] + cfg->num_counters > 8) {
+ NOUVEAU_ERR("Not enough free MP counter slots !\n");
+ return false;
+ }
+
+ assert(cfg->num_counters <= 8);
+ PUSH_SPACE(push, 8 * 8 + 2);
+
+ /* set sequence field to 0 (used to check if result is available) */
+ for (i = 0; i < screen->mp_count; ++i) {
+ const unsigned b = (0x30 / 4) * i;
+ hq->data[b + 8] = 0;
+ }
+ hq->sequence++;
+
+ for (i = 0; i < cfg->num_counters; ++i) {
+ uint32_t mask_sel = 0x00000000;
+
+ if (!screen->pm.num_hw_sm_active[0]) {
+ BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
+ PUSH_DATA (push, 0x80000000);
+ }
+ screen->pm.num_hw_sm_active[0]++;
+
+ for (c = 0; c < 8; ++c) {
+ if (!screen->pm.mp_counter[c]) {
+ hsq->ctr[i] = c;
+ screen->pm.mp_counter[c] = hsq;
+ break;
+ }
+ }
+
+ /* Oddly-enough, the signal id depends on the slot selected on Fermi but
+ * not on Kepler. Fortunately, the signal ids are just offseted by the
+ * slot id! */
+ mask_sel |= c;
+ mask_sel |= (c << 8);
+ mask_sel |= (c << 16);
+ mask_sel |= (c << 24);
+ mask_sel &= cfg->ctr[i].src_mask;
+
+ /* configure and reset the counter(s) */
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(c)), 1);
+ PUSH_DATA (push, cfg->ctr[i].sig_sel);
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(c)), 1);
+ PUSH_DATA (push, cfg->ctr[i].src_sel | mask_sel);
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(c)), 1);
+ PUSH_DATA (push, 0);
+ }
+ return true;
+}
+
+static void
+nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
+ struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
+ uint32_t mask;
+ uint32_t input[3];
+ const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
+ const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 };
+ unsigned c;
+
+ if (unlikely(!screen->pm.prog)) {
+ struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
+ prog->type = PIPE_SHADER_COMPUTE;
+ prog->translated = true;
+ prog->num_gprs = 14;
+ prog->parm_size = 12;
+ if (is_nve4) {
+ prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
+ prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
+ } else {
+ prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
+ prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
+ }
+ screen->pm.prog = prog;
+ }
+
+ /* disable all counting */
+ PUSH_SPACE(push, 8);
+ for (c = 0; c < 8; ++c)
+ if (screen->pm.mp_counter[c]) {
+ if (is_nve4) {
+ IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
+ } else {
+ IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
+ }
+ }
+ /* release counters for this query */
+ for (c = 0; c < 8; ++c) {
+ if (screen->pm.mp_counter[c] == hsq) {
+ uint8_t d = is_nve4 ? c / 4 : 0; /* only one domain for NVC0:NVE4 */
+ screen->pm.num_hw_sm_active[d]--;
+ screen->pm.mp_counter[c] = NULL;
+ }
+ }
+
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
+ hq->bo);
+
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
+
+ pipe->bind_compute_state(pipe, screen->pm.prog);
+ input[0] = (hq->bo->offset + hq->base_offset);
+ input[1] = (hq->bo->offset + hq->base_offset) >> 32;
+ input[2] = hq->sequence;
+ pipe->launch_grid(pipe, block, grid, 0, input);
+
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
+
+ /* re-activate other counters */
+ PUSH_SPACE(push, 16);
+ mask = 0;
+ for (c = 0; c < 8; ++c) {
+ const struct nvc0_hw_sm_query_cfg *cfg;
+ unsigned i;
+
+ hsq = screen->pm.mp_counter[c];
+ if (!hsq)
+ continue;
+
+ cfg = nvc0_hw_sm_query_get_cfg(nvc0, &hsq->base);
+ for (i = 0; i < cfg->num_counters; ++i) {
+ if (mask & (1 << hsq->ctr[i]))
+ break;
+ mask |= 1 << hsq->ctr[i];
+ if (is_nve4) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(hsq->ctr[i])), 1);
+ } else {
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(hsq->ctr[i])), 1);
+ }
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ }
+ }
+}
+
+static inline bool
+nvc0_hw_sm_query_read_data(uint32_t count[32][8],
+ struct nvc0_context *nvc0, bool wait,
+ struct nvc0_hw_query *hq,
+ const struct nvc0_hw_sm_query_cfg *cfg,
+ unsigned mp_count)
+{
+ struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
+ unsigned p, c;
+
+ for (p = 0; p < mp_count; ++p) {
+ const unsigned b = (0x30 / 4) * p;
+
+ for (c = 0; c < cfg->num_counters; ++c) {
+ if (hq->data[b + 8] != hq->sequence) {
+ if (!wait)
+ return false;
+ if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->base.client))
+ return false;
+ }
+ count[p][c] = hq->data[b + hsq->ctr[c]] * (1 << c);
+ }
+ }
+ return true;
+}
+
+static inline bool
+nve4_hw_sm_query_read_data(uint32_t count[32][8],
+ struct nvc0_context *nvc0, bool wait,
+ struct nvc0_hw_query *hq,
+ const struct nvc0_hw_sm_query_cfg *cfg,
+ unsigned mp_count)
+{
+ struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
+ unsigned p, c, d;
+
+ for (p = 0; p < mp_count; ++p) {
+ const unsigned b = (0x60 / 4) * p;
+
+ for (c = 0; c < cfg->num_counters; ++c) {
+ count[p][c] = 0;
+ for (d = 0; d < ((hsq->ctr[c] & ~3) ? 1 : 4); ++d) {
+ if (hq->data[b + 20 + d] != hq->sequence) {
+ if (!wait)
+ return false;
+ if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->base.client))
+ return false;
+ }
+ if (hsq->ctr[c] & ~0x3)
+ count[p][c] = hq->data[b + 16 + (hsq->ctr[c] & 3)];
+ else
+ count[p][c] += hq->data[b + d * 4 + hsq->ctr[c]];
+ }
+ }
+ }
+ return true;
+}
+
+/* Metric calculations:
+ * sum(x) ... sum of x over all MPs
+ * avg(x) ... average of x over all MPs
+ *
+ * IPC : sum(inst_executed) / clock
+ * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
+ * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
+ * MP_EFFICIENCY : avg(active_cycles / clock)
+ *
+ * NOTE: Interpretation of IPC requires knowledge of MP count.
+ */
+static boolean
+nvc0_hw_sm_get_query_result(struct nvc0_context *nvc0, struct nvc0_hw_query *hq,
+ boolean wait, union pipe_query_result *result)
+{
+ uint32_t count[32][8];
+ uint64_t value = 0;
+ unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
+ unsigned p, c;
+ const struct nvc0_hw_sm_query_cfg *cfg;
+ bool ret;
+
+ cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq);
+
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+ ret = nve4_hw_sm_query_read_data(count, nvc0, wait, hq, cfg, mp_count);
+ else
+ ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, hq, cfg, mp_count);
+ if (!ret)
+ return false;
+
+ if (cfg->op == NVC0_COUNTER_OPn_SUM) {
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ value += count[p][c];
+ value = (value * cfg->norm[0]) / cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OPn_OR) {
+ uint32_t v = 0;
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ v |= count[p][c];
+ value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OPn_AND) {
+ uint32_t v = ~0;
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ v &= count[p][c];
+ value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
+ uint64_t v[2] = { 0, 0 };
+ for (p = 0; p < mp_count; ++p) {
+ v[0] += count[p][0];
+ v[1] += count[p][1];
+ }
+ if (v[0])
+ value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
+ for (p = 0; p < mp_count; ++p)
+ value += count[p][0];
+ if (count[0][1])
+ value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
+ else
+ value = 0;
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
+ unsigned mp_used = 0;
+ for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
+ if (count[p][1])
+ value += (count[p][0] * cfg->norm[0]) / count[p][1];
+ if (mp_used)
+ value /= (uint64_t)mp_used * cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
+ unsigned mp_used = 0;
+ for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
+ value += count[p][0];
+ if (count[0][1] && mp_used) {
+ value *= cfg->norm[0];
+ value /= (uint64_t)count[0][1] * mp_used * cfg->norm[1];
+ } else {
+ value = 0;
+ }
+ }
+
+ *(uint64_t *)result = value;
+ return true;
+}
+
+static const struct nvc0_hw_query_funcs hw_sm_query_funcs = {
+ .destroy_query = nvc0_hw_sm_destroy_query,
+ .begin_query = nvc0_hw_sm_begin_query,
+ .end_query = nvc0_hw_sm_end_query,
+ .get_query_result = nvc0_hw_sm_get_query_result,
+};
+
+struct nvc0_hw_query *
+nvc0_hw_sm_create_query(struct nvc0_context *nvc0, unsigned type)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nvc0_hw_sm_query *hsq;
+ struct nvc0_hw_query *hq;
+ unsigned space;
+
+ if (nvc0->screen->base.device->drm_version < 0x01000101)
+ return NULL;
+
+ if ((type < NVE4_HW_SM_QUERY(0) || type > NVE4_HW_SM_QUERY_LAST) &&
+ (type < NVC0_HW_SM_QUERY(0) || type > NVC0_HW_SM_QUERY_LAST))
+ return NULL;
+
+ hsq = CALLOC_STRUCT(nvc0_hw_sm_query);
+ if (!hsq)
+ return NULL;
+
+ hq = &hsq->base;
+ hq->funcs = &hw_sm_query_funcs;
+ hq->base.type = type;
+
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ /* for each MP:
+ * [00] = WS0.C0
+ * [04] = WS0.C1
+ * [08] = WS0.C2
+ * [0c] = WS0.C3
+ * [24] = WS2.C1
+ * [28] = WS2.C2
+ * [2c] = WS2.C3
+ * [30] = WS3.C0
+ * [34] = WS3.C1
+ * [38] = WS3.C2
+ * [3c] = WS3.C3
+ * [40] = MP.C4
+ * [44] = MP.C5
+ * [48] = MP.C6
+ * [4c] = MP.C7
+ * [50] = WS0.sequence
+ * [54] = WS1.sequence
+ * [58] = WS2.sequence
+ * [5c] = WS3.sequence
+ */
+ space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
+ } else {
+ /*
+ * Note that padding is used to align memory access to 128 bits.
+ *
+ * for each MP:
+ * [00] = MP.C0
+ * [04] = MP.C1
+ * [08] = MP.C2
+ * [0c] = MP.C3
+ * [10] = MP.C4
+ * [14] = MP.C5
+ * [18] = MP.C6
+ * [1c] = MP.C7
+ * [20] = MP.sequence
+ * [24] = padding
+ * [28] = padding
+ * [2c] = padding
+ */
+ space = (8 + 1 + 3) * nvc0->screen->mp_count * sizeof(uint32_t);
+ }
+
+ if (!nvc0_hw_query_allocate(nvc0, &hq->base, space)) {
+ FREE(hq);
+ return NULL;
+ }
+
+ return hq;
+}
+
+static int
+nvc0_hw_sm_get_next_query_id(const struct nvc0_hw_sm_query_cfg **queries,
+ unsigned id)
+{
+ unsigned i, next = 0;
+
+ for (i = 0; i < NVC0_HW_SM_QUERY_COUNT; i++) {
+ if (!queries[i]) {
+ next++;
+ } else
+ if (i >= id && queries[id + next]) {
+ break;
+ }
+ }
+ return id + next;
+}
+
+int
+nvc0_hw_sm_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int count = 0;
+
+ if (screen->base.device->drm_version >= 0x01000101) {
+ if (screen->compute) {
+ if (screen->base.class_3d == NVE4_3D_CLASS) {
+ count += NVE4_HW_SM_QUERY_COUNT;
+ } else
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+ const struct nvc0_hw_sm_query_cfg **queries =
+ nvc0_hw_sm_get_queries(screen);
+ unsigned i;
+
+ for (i = 0; i < NVC0_HW_SM_QUERY_COUNT; i++) {
+ if (queries[i])
+ count++;
+ }
+ }
+ }
+ }
+
+ if (!info)
+ return count;
+
+ if (id < count) {
+ if (screen->compute) {
+ if (screen->base.class_3d == NVE4_3D_CLASS) {
+ info->name = nve4_hw_sm_query_names[id];
+ info->query_type = NVE4_HW_SM_QUERY(id);
+ info->max_value.u64 =
+ (id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
+ info->group_id = NVC0_HW_SM_QUERY_GROUP;
+ return 1;
+ } else
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+ const struct nvc0_hw_sm_query_cfg **queries =
+ nvc0_hw_sm_get_queries(screen);
+
+ id = nvc0_hw_sm_get_next_query_id(queries, id);
+ info->name = nvc0_hw_sm_query_names[id];
+ info->query_type = NVC0_HW_SM_QUERY(id);
+ info->group_id = NVC0_HW_SM_QUERY_GROUP;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h
new file mode 100644
index 00000000000..26bde0c3e0d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h
@@ -0,0 +1,120 @@
+#ifndef __NVC0_QUERY_HW_SM_H__
+#define __NVC0_QUERY_HW_SM_H__
+
+#include "nvc0_query_hw.h"
+
+struct nvc0_hw_sm_query {
+ struct nvc0_hw_query base;
+ uint8_t ctr[8];
+};
+
+static inline struct nvc0_hw_sm_query *
+nvc0_hw_sm_query(struct nvc0_hw_query *hq)
+{
+ return (struct nvc0_hw_sm_query *)hq;
+}
+
+/*
+ * Performance counter queries:
+ */
+#define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1)
+enum nve4_hw_sm_queries
+{
+ NVE4_HW_SM_QUERY_ACTIVE_CYCLES = 0,
+ NVE4_HW_SM_QUERY_ACTIVE_WARPS,
+ NVE4_HW_SM_QUERY_ATOM_COUNT,
+ NVE4_HW_SM_QUERY_BRANCH,
+ NVE4_HW_SM_QUERY_DIVERGENT_BRANCH,
+ NVE4_HW_SM_QUERY_GLD_REQUEST,
+ NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY,
+ NVE4_HW_SM_QUERY_GST_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY,
+ NVE4_HW_SM_QUERY_GRED_COUNT,
+ NVE4_HW_SM_QUERY_GST_REQUEST,
+ NVE4_HW_SM_QUERY_INST_EXECUTED,
+ NVE4_HW_SM_QUERY_INST_ISSUED,
+ NVE4_HW_SM_QUERY_INST_ISSUED1,
+ NVE4_HW_SM_QUERY_INST_ISSUED2,
+ NVE4_HW_SM_QUERY_L1_GLD_HIT,
+ NVE4_HW_SM_QUERY_L1_GLD_MISS,
+ NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT,
+ NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS,
+ NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT,
+ NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS,
+ NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_LOCAL_LD,
+ NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_LOCAL_ST,
+ NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_0,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_1,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_2,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_3,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_4,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_5,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_6,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_7,
+ NVE4_HW_SM_QUERY_SHARED_LD,
+ NVE4_HW_SM_QUERY_SHARED_LD_REPLAY,
+ NVE4_HW_SM_QUERY_SHARED_ST,
+ NVE4_HW_SM_QUERY_SHARED_ST_REPLAY,
+ NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED,
+ NVE4_HW_SM_QUERY_THREADS_LAUNCHED,
+ NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_WARPS_LAUNCHED,
+ NVE4_HW_SM_QUERY_METRIC_IPC,
+ NVE4_HW_SM_QUERY_METRIC_IPAC,
+ NVE4_HW_SM_QUERY_METRIC_IPEC,
+ NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY,
+ NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY,
+ NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD,
+ NVE4_HW_SM_QUERY_COUNT
+};
+
+#define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
+#define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1)
+enum nvc0_hw_sm_queries
+{
+ NVC0_HW_SM_QUERY_ACTIVE_CYCLES = 0,
+ NVC0_HW_SM_QUERY_ACTIVE_WARPS,
+ NVC0_HW_SM_QUERY_ATOM_COUNT,
+ NVC0_HW_SM_QUERY_BRANCH,
+ NVC0_HW_SM_QUERY_DIVERGENT_BRANCH,
+ NVC0_HW_SM_QUERY_GLD_REQUEST,
+ NVC0_HW_SM_QUERY_GRED_COUNT,
+ NVC0_HW_SM_QUERY_GST_REQUEST,
+ NVC0_HW_SM_QUERY_INST_EXECUTED,
+ NVC0_HW_SM_QUERY_INST_ISSUED,
+ NVC0_HW_SM_QUERY_INST_ISSUED1_0,
+ NVC0_HW_SM_QUERY_INST_ISSUED1_1,
+ NVC0_HW_SM_QUERY_INST_ISSUED2_0,
+ NVC0_HW_SM_QUERY_INST_ISSUED2_1,
+ NVC0_HW_SM_QUERY_LOCAL_LD,
+ NVC0_HW_SM_QUERY_LOCAL_ST,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_0,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_1,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_2,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_3,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_4,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_5,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_6,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_7,
+ NVC0_HW_SM_QUERY_SHARED_LD,
+ NVC0_HW_SM_QUERY_SHARED_ST,
+ NVC0_HW_SM_QUERY_THREADS_LAUNCHED,
+ NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0,
+ NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1,
+ NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2,
+ NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3,
+ NVC0_HW_SM_QUERY_WARPS_LAUNCHED,
+ NVC0_HW_SM_QUERY_COUNT
+};
+
+struct nvc0_hw_query *
+nvc0_hw_sm_create_query(struct nvc0_context *, unsigned);
+int
+nvc0_hw_sm_get_driver_query_info(struct nvc0_screen *, unsigned,
+ struct pipe_driver_query_info *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.c
new file mode 100644
index 00000000000..cd24618d564
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+
+#include "nvc0_query_sw.h"
+
+/* === DRIVER STATISTICS === */
+
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+
+static const char *nvc0_sw_query_drv_stat_names[] =
+{
+ "drv-tex_obj_current_count",
+ "drv-tex_obj_current_bytes",
+ "drv-buf_obj_current_count",
+ "drv-buf_obj_current_bytes_vid",
+ "drv-buf_obj_current_bytes_sys",
+ "drv-tex_transfers_rd",
+ "drv-tex_transfers_wr",
+ "drv-tex_copy_count",
+ "drv-tex_blit_count",
+ "drv-tex_cache_flush_count",
+ "drv-buf_transfers_rd",
+ "drv-buf_transfers_wr",
+ "drv-buf_read_bytes_staging_vid",
+ "drv-buf_write_bytes_direct",
+ "drv-buf_write_bytes_staging_vid",
+ "drv-buf_write_bytes_staging_sys",
+ "drv-buf_copy_bytes",
+ "drv-buf_non_kernel_fence_sync_count",
+ "drv-any_non_kernel_fence_sync_count",
+ "drv-query_sync_count",
+ "drv-gpu_serialize_count",
+ "drv-draw_calls_array",
+ "drv-draw_calls_indexed",
+ "drv-draw_calls_fallback_count",
+ "drv-user_buffer_upload_bytes",
+ "drv-constbuf_upload_count",
+ "drv-constbuf_upload_bytes",
+ "drv-pushbuf_count",
+ "drv-resource_validate_count"
+};
+
+#endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
+
+static void
+nvc0_sw_destroy_query(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nvc0_sw_query *sq = nvc0_sw_query(q);
+ FREE(sq);
+}
+
+static boolean
+nvc0_sw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ struct nvc0_sw_query *sq = nvc0_sw_query(q);
+
+ if (q->index >= 5) {
+ sq->value = nvc0->screen->base.stats.v[q->index];
+ } else {
+ sq->value = 0;
+ }
+#endif
+ return true;
+}
+
+static void
+nvc0_sw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ struct nvc0_sw_query *sq = nvc0_sw_query(q);
+ sq->value = nvc0->screen->base.stats.v[q->index] - sq->value;
+#endif
+}
+
+static boolean
+nvc0_sw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
+ boolean wait, union pipe_query_result *result)
+{
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ struct nvc0_sw_query *sq = nvc0_sw_query(q);
+ uint64_t *res64 = (uint64_t *)result;
+
+ res64[0] = sq->value;
+#endif
+ return true;
+}
+
+static const struct nvc0_query_funcs sw_query_funcs = {
+ .destroy_query = nvc0_sw_destroy_query,
+ .begin_query = nvc0_sw_begin_query,
+ .end_query = nvc0_sw_end_query,
+ .get_query_result = nvc0_sw_get_query_result,
+};
+
+struct nvc0_query *
+nvc0_sw_create_query(struct nvc0_context *nvcO, unsigned type, unsigned index)
+{
+ struct nvc0_sw_query *sq;
+ struct nvc0_query *q;
+
+ if (type < NVC0_SW_QUERY_DRV_STAT(0) || type > NVC0_SW_QUERY_DRV_STAT_LAST)
+ return NULL;
+
+ sq = CALLOC_STRUCT(nvc0_sw_query);
+ if (!sq)
+ return NULL;
+
+ q = &sq->base;
+ q->funcs = &sw_query_funcs;
+ q->type = type;
+ q->index = type - NVC0_SW_QUERY_DRV_STAT(0);
+
+ return q;
+}
+
+int
+nvc0_sw_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int count = 0;
+
+ count += NVC0_SW_QUERY_DRV_STAT_COUNT;
+ if (!info)
+ return count;
+
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (id < count) {
+ info->name = nvc0_sw_query_drv_stat_names[id];
+ info->query_type = NVC0_SW_QUERY_DRV_STAT(id);
+ info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+ info->max_value.u64 = 0;
+ if (strstr(info->name, "bytes"))
+ info->type = PIPE_DRIVER_QUERY_TYPE_BYTES;
+ info->group_id = NVC0_SW_QUERY_DRV_STAT_GROUP;
+ return 1;
+ }
+#endif
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.h
new file mode 100644
index 00000000000..eaa890e4fc0
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.h
@@ -0,0 +1,64 @@
+#ifndef __NVC0_QUERY_SW_H__
+#define __NVC0_QUERY_SW_H__
+
+#include "nvc0_query.h"
+
+struct nvc0_sw_query {
+ struct nvc0_query base;
+ uint64_t value;
+};
+
+static inline struct nvc0_sw_query *
+nvc0_sw_query(struct nvc0_query *q)
+{
+ return (struct nvc0_sw_query *)q;
+}
+
+/*
+ * Driver statistics queries:
+ */
+#define NVC0_SW_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NVC0_SW_QUERY_DRV_STAT_LAST NVC0_SW_QUERY_DRV_STAT(NVC0_SW_QUERY_DRV_STAT_COUNT - 1)
+enum nvc0_sw_query_drv_stat
+{
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ NVC0_SW_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT = 0,
+ NVC0_SW_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES,
+ NVC0_SW_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID,
+ NVC0_SW_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS,
+ NVC0_SW_QUERY_DRV_STAT_TEX_TRANSFERS_READ,
+ NVC0_SW_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE,
+ NVC0_SW_QUERY_DRV_STAT_TEX_COPY_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_TEX_BLIT_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_BUF_TRANSFERS_READ,
+ NVC0_SW_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE,
+ NVC0_SW_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID,
+ NVC0_SW_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT,
+ NVC0_SW_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID,
+ NVC0_SW_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS,
+ NVC0_SW_QUERY_DRV_STAT_BUF_COPY_BYTES,
+ NVC0_SW_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_QUERY_SYNC_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_DRAW_CALLS_ARRAY,
+ NVC0_SW_QUERY_DRV_STAT_DRAW_CALLS_INDEXED,
+ NVC0_SW_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES,
+ NVC0_SW_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES,
+ NVC0_SW_QUERY_DRV_STAT_PUSHBUF_COUNT,
+ NVC0_SW_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT,
+#endif
+ NVC0_SW_QUERY_DRV_STAT_COUNT
+};
+
+struct nvc0_query *
+nvc0_sw_create_query(struct nvc0_context *, unsigned, unsigned);
+int
+nvc0_sw_get_driver_query_info(struct nvc0_screen *, unsigned,
+ struct pipe_driver_query_info *);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index afd91e6feee..f34ad0ed5d1 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -561,12 +561,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
switch (screen->base.device->chipset & ~0xf) {
case 0xc0:
case 0xd0:
- /* Using COMPUTE has weird effects on 3D state, we need to
- * investigate this further before enabling it by default.
- */
- if (debug_get_bool_option("NVC0_COMPUTE", false))
- return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
- return 0;
+ return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
case 0xe0:
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
case 0xf0:
@@ -914,6 +909,7 @@ nvc0_screen_create(struct nouveau_device *dev)
else
value = (16 << 8) | 4;
}
+ screen->gpc_count = value & 0x000000ff;
screen->mp_count = value >> 8;
screen->mp_count_compute = screen->mp_count;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index f57a316f01e..857eb0316c7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -67,6 +67,7 @@ struct nvc0_screen {
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *poly_cache;
+ uint8_t gpc_count;
uint16_t mp_count;
uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */
@@ -94,7 +95,7 @@ struct nvc0_screen {
struct {
struct nvc0_program *prog; /* compute state object to read MP counters */
- struct pipe_query *mp_counter[8]; /* counter to query allocation */
+ struct nvc0_hw_sm_query *mp_counter[8]; /* counter to query allocation */
uint8_t num_hw_sm_active[2];
bool mp_counters_enabled;
} pm;
@@ -112,148 +113,6 @@ nvc0_screen(struct pipe_screen *screen)
return (struct nvc0_screen *)screen;
}
-/*
- * Performance counters groups:
- */
-#define NVC0_QUERY_MP_COUNTER_GROUP 0
-#define NVC0_QUERY_DRV_STAT_GROUP 1
-
-/* Performance counter queries:
- */
-#define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
-#define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1)
-enum nve4_pm_queries
-{
- NVE4_HW_SM_QUERY_ACTIVE_CYCLES = 0,
- NVE4_HW_SM_QUERY_ACTIVE_WARPS,
- NVE4_HW_SM_QUERY_ATOM_COUNT,
- NVE4_HW_SM_QUERY_BRANCH,
- NVE4_HW_SM_QUERY_DIVERGENT_BRANCH,
- NVE4_HW_SM_QUERY_GLD_REQUEST,
- NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY,
- NVE4_HW_SM_QUERY_GST_TRANSACTIONS,
- NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY,
- NVE4_HW_SM_QUERY_GRED_COUNT,
- NVE4_HW_SM_QUERY_GST_REQUEST,
- NVE4_HW_SM_QUERY_INST_EXECUTED,
- NVE4_HW_SM_QUERY_INST_ISSUED,
- NVE4_HW_SM_QUERY_INST_ISSUED1,
- NVE4_HW_SM_QUERY_INST_ISSUED2,
- NVE4_HW_SM_QUERY_L1_GLD_HIT,
- NVE4_HW_SM_QUERY_L1_GLD_MISS,
- NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT,
- NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS,
- NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT,
- NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS,
- NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS,
- NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS,
- NVE4_HW_SM_QUERY_LOCAL_LD,
- NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS,
- NVE4_HW_SM_QUERY_LOCAL_ST,
- NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS,
- NVE4_HW_SM_QUERY_PROF_TRIGGER_0,
- NVE4_HW_SM_QUERY_PROF_TRIGGER_1,
- NVE4_HW_SM_QUERY_PROF_TRIGGER_2,
- NVE4_HW_SM_QUERY_PROF_TRIGGER_3,
- NVE4_HW_SM_QUERY_PROF_TRIGGER_4,
- NVE4_HW_SM_QUERY_PROF_TRIGGER_5,
- NVE4_HW_SM_QUERY_PROF_TRIGGER_6,
- NVE4_HW_SM_QUERY_PROF_TRIGGER_7,
- NVE4_HW_SM_QUERY_SHARED_LD,
- NVE4_HW_SM_QUERY_SHARED_LD_REPLAY,
- NVE4_HW_SM_QUERY_SHARED_ST,
- NVE4_HW_SM_QUERY_SHARED_ST_REPLAY,
- NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED,
- NVE4_HW_SM_QUERY_THREADS_LAUNCHED,
- NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS,
- NVE4_HW_SM_QUERY_WARPS_LAUNCHED,
- NVE4_HW_SM_QUERY_METRIC_IPC,
- NVE4_HW_SM_QUERY_METRIC_IPAC,
- NVE4_HW_SM_QUERY_METRIC_IPEC,
- NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY,
- NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY,
- NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD,
- NVE4_HW_SM_QUERY_COUNT
-};
-
-#define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
-#define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1)
-enum nvc0_pm_queries
-{
- NVC0_HW_SM_QUERY_ACTIVE_CYCLES = 0,
- NVC0_HW_SM_QUERY_ACTIVE_WARPS,
- NVC0_HW_SM_QUERY_ATOM_COUNT,
- NVC0_HW_SM_QUERY_BRANCH,
- NVC0_HW_SM_QUERY_DIVERGENT_BRANCH,
- NVC0_HW_SM_QUERY_GLD_REQUEST,
- NVC0_HW_SM_QUERY_GRED_COUNT,
- NVC0_HW_SM_QUERY_GST_REQUEST,
- NVC0_HW_SM_QUERY_INST_EXECUTED,
- NVC0_HW_SM_QUERY_INST_ISSUED1_0,
- NVC0_HW_SM_QUERY_INST_ISSUED1_1,
- NVC0_HW_SM_QUERY_INST_ISSUED2_0,
- NVC0_HW_SM_QUERY_INST_ISSUED2_1,
- NVC0_HW_SM_QUERY_LOCAL_LD,
- NVC0_HW_SM_QUERY_LOCAL_ST,
- NVC0_HW_SM_QUERY_PROF_TRIGGER_0,
- NVC0_HW_SM_QUERY_PROF_TRIGGER_1,
- NVC0_HW_SM_QUERY_PROF_TRIGGER_2,
- NVC0_HW_SM_QUERY_PROF_TRIGGER_3,
- NVC0_HW_SM_QUERY_PROF_TRIGGER_4,
- NVC0_HW_SM_QUERY_PROF_TRIGGER_5,
- NVC0_HW_SM_QUERY_PROF_TRIGGER_6,
- NVC0_HW_SM_QUERY_PROF_TRIGGER_7,
- NVC0_HW_SM_QUERY_SHARED_LD,
- NVC0_HW_SM_QUERY_SHARED_ST,
- NVC0_HW_SM_QUERY_THREADS_LAUNCHED,
- NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0,
- NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1,
- NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2,
- NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3,
- NVC0_HW_SM_QUERY_WARPS_LAUNCHED,
- NVC0_HW_SM_QUERY_COUNT
-};
-
-/* Driver statistics queries:
- */
-#define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
-#define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
-enum nvc0_drv_stats_queries
-{
-#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
- NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT = 0,
- NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES,
- NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT,
- NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID,
- NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS,
- NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ,
- NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE,
- NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT,
- NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT,
- NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT,
- NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ,
- NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE,
- NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID,
- NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT,
- NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID,
- NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS,
- NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES,
- NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT,
- NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT,
- NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT,
- NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT,
- NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY,
- NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED,
- NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT,
- NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES,
- NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT,
- NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES,
- NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT,
- NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT,
-#endif
- NVC0_QUERY_DRV_STAT_COUNT
-};
-
int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
struct pipe_driver_query_info *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 8f8ac2d34b9..af837fc4a33 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -26,6 +26,7 @@
#include "util/u_inlines.h"
#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw.h"
static inline void
nvc0_program_update_context_state(struct nvc0_context *nvc0,
@@ -272,14 +273,14 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
continue;
if (!targ->clean)
- nvc0_query_fifo_wait(push, targ->pq);
+ nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
PUSH_DATA (push, 1);
PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
PUSH_DATA (push, targ->pipe.buffer_size);
if (!targ->clean) {
- nvc0_query_pushbuf_submit(push, targ->pq, 0x4);
+ nvc0_hw_query_pushbuf_submit(push, nvc0_query(targ->pq), 0x4);
} else {
PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
targ->clean = false;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index c5bfd03956d..742bef39247 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -29,6 +29,7 @@
#include "nvc0/nvc0_stateobj.h"
#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw.h"
#include "nvc0/nvc0_3d.xml.h"
#include "nv50/nv50_texture.xml.h"
@@ -1070,7 +1071,7 @@ nvc0_so_target_create(struct pipe_context *pipe,
if (!targ)
return NULL;
- targ->pq = pipe->create_query(pipe, NVC0_QUERY_TFB_BUFFER_OFFSET, 0);
+ targ->pq = pipe->create_query(pipe, NVC0_HW_QUERY_TFB_BUFFER_OFFSET, 0);
if (!targ->pq) {
FREE(targ);
return NULL;
@@ -1091,6 +1092,25 @@ nvc0_so_target_create(struct pipe_context *pipe,
}
static void
+nvc0_so_target_save_offset(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg,
+ unsigned index, bool *serialize)
+{
+ struct nvc0_so_target *targ = nvc0_so_target(ptarg);
+
+ if (*serialize) {
+ *serialize = false;
+ PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
+ IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
+
+ NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
+ }
+
+ nvc0_query(targ->pq)->index = index;
+ pipe->end_query(pipe, targ->pq);
+}
+
+static void
nvc0_so_target_destroy(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg)
{
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index aaec60a5ac2..d459dd61c19 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv,
nouveau_pushbuf_validate(push);
while (count) {
- unsigned nr;
+ unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
- if (!PUSH_SPACE(push, 16))
+ if (!PUSH_SPACE(push, nr + 9))
break;
- nr = PUSH_AVAIL(push);
- assert(nr >= 16);
- nr = MIN2(count, nr - 9);
- nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
PUSH_DATAh(push, dst->offset + offset);
@@ -234,14 +230,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
nouveau_pushbuf_validate(push);
while (count) {
- unsigned nr;
+ unsigned nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN - 1));
- if (!PUSH_SPACE(push, 16))
+ if (!PUSH_SPACE(push, nr + 10))
break;
- nr = PUSH_AVAIL(push);
- assert(nr >= 16);
- nr = MIN2(count, nr - 8);
- nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));
BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, dst->offset + offset);
@@ -571,9 +563,7 @@ nvc0_cb_bo_push(struct nouveau_context *nv,
PUSH_DATA (push, bo->offset + base);
while (words) {
- unsigned nr = PUSH_AVAIL(push);
- nr = MIN2(nr, words);
- nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+ unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN - 1);
PUSH_SPACE(push, nr + 2);
PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 188c7d7cdc8..c464904d6d4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -29,6 +29,7 @@
#include "translate/translate.h"
#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw.h"
#include "nvc0/nvc0_resource.h"
#include "nvc0/nvc0_3d.xml.h"
@@ -775,7 +776,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
PUSH_SPACE(push, 2);
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
- nvc0_query_fifo_wait(push, so->pq);
+ nvc0_hw_query_fifo_wait(push, nvc0_query(so->pq));
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
@@ -791,7 +792,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_STRIDE), 1);
PUSH_DATA (push, so->stride);
BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BYTES), 1);
- nvc0_query_pushbuf_submit(push, so->pq, 0x4);
+ nvc0_hw_query_pushbuf_submit(push, nvc0_query(so->pq), 0x4);
IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index efb4889e562..32ce76a9e07 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -305,7 +305,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (family >= CHIP_CEDAR)
- return 330;
+ return 410;
/* pre-evergreen geom shaders need newer kernel */
if (rscreen->b.info.drm_minor >= 37)
return 330;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 1d905822cde..8efe902a329 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -166,8 +166,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
if (rctx->b.chip_class <= R700) {
use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
}
- /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
- use_sb &= !shader->shader.uses_index_registers;
/* disable SB for shaders using doubles */
use_sb &= !shader->shader.uses_doubles;
@@ -1250,9 +1248,6 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx)
continue;
}
- if (ctx->src[i].kc_rel)
- ctx->shader->uses_index_registers = true;
-
if (ctx->src[i].rel) {
int chan = inst->Src[i].Indirect.Swizzle;
int treg = r600_get_temp(ctx);
@@ -1912,7 +1907,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
shader->uses_doubles = ctx.info.uses_doubles;
- indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
+ indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER));
tgsi_parse_init(&ctx.parse, tokens);
ctx.type = ctx.info.processor;
shader->processor_type = ctx.type;
@@ -1936,7 +1931,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.gs_next_vertex = 0;
ctx.gs_stream_output_info = &so;
- shader->uses_index_registers = false;
ctx.face_gpr = -1;
ctx.fixed_pt_position_gpr = -1;
ctx.fragcoord_input = -1;
@@ -5703,8 +5697,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
sampler_src_reg = 3;
sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
- if (sampler_index_mode)
- ctx->shader->uses_index_registers = true;
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 48de9cdb156..c240e7110c1 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -75,8 +75,6 @@ struct r600_shader {
boolean has_txq_cube_array_z_comp;
boolean uses_tex_buffers;
boolean gs_prim_id_input;
- /* Temporarily workaround SB not handling CF_INDEX_[01] index registers */
- boolean uses_index_registers;
/* Size in bytes of a data item in the ring(s) (single vertex data).
Stages with only one ring items 123 will be set to 0. */
diff --git a/src/gallium/drivers/r600/r600_uvd.c b/src/gallium/drivers/r600/r600_uvd.c
index 357e9017a65..e2e9033ea2c 100644
--- a/src/gallium/drivers/r600/r600_uvd.c
+++ b/src/gallium/drivers/r600/r600_uvd.c
@@ -47,8 +47,11 @@
#include "r600_pipe.h"
#include "radeon/radeon_video.h"
#include "radeon/radeon_uvd.h"
+#include "radeon/radeon_vce.h"
#include "r600d.h"
+#define R600_UVD_ENABLE_TILING 0
+
/**
* creates an video buffer with an UVD compatible memory layout
*/
@@ -77,7 +80,7 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
template.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT);
vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, PIPE_USAGE_DEFAULT, 0);
- if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced)
+ if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING)
templ.bind = PIPE_BIND_LINEAR;
resources[0] = (struct r600_texture *)
pipe->screen->resource_create(pipe->screen, &templ);
@@ -86,7 +89,7 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
if (resource_formats[1] != PIPE_FORMAT_NONE) {
vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, PIPE_USAGE_DEFAULT, 1);
- if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced)
+ if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING)
templ.bind = PIPE_BIND_LINEAR;
resources[1] = (struct r600_texture *)
pipe->screen->resource_create(pipe->screen, &templ);
@@ -96,7 +99,7 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
if (resource_formats[2] != PIPE_FORMAT_NONE) {
vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, PIPE_USAGE_DEFAULT, 2);
- if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced)
+ if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING)
templ.bind = PIPE_BIND_LINEAR;
resources[2] = (struct r600_texture *)
pipe->screen->resource_create(pipe->screen, &templ);
@@ -166,9 +169,28 @@ static struct radeon_winsys_cs_handle* r600_uvd_set_dtb(struct ruvd_msg *msg, st
return luma->resource.cs_buf;
}
+/* get the radeon resources for VCE */
+static void r600_vce_get_buffer(struct pipe_resource *resource,
+ struct radeon_winsys_cs_handle **handle,
+ struct radeon_surf **surface)
+{
+ struct r600_texture *res = (struct r600_texture *)resource;
+
+ if (handle)
+ *handle = res->resource.cs_buf;
+
+ if (surface)
+ *surface = &res->surface;
+}
+
/* create decoder */
struct pipe_video_codec *r600_uvd_create_decoder(struct pipe_context *context,
- const struct pipe_video_codec *templat)
+ const struct pipe_video_codec *templat)
{
+ struct r600_context *ctx = (struct r600_context *)context;
+
+ if (templat->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
+ return rvce_create_encoder(context, templat, ctx->b.ws, r600_vce_get_buffer);
+
return ruvd_create_decoder(context, templat, r600_uvd_set_dtb);
}
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index ab988f8716d..9c2a9170436 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -48,6 +48,7 @@ class fetch_node;
class alu_group_node;
class region_node;
class shader;
+class value;
class sb_ostream {
public:
@@ -477,7 +478,9 @@ struct bc_cf {
bool is_alu_extended() {
assert(op_ptr->flags & CF_ALU);
- return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE;
+ return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
+ kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
+ kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
}
};
@@ -818,13 +821,16 @@ class bc_parser {
bool gpr_reladdr;
+ // Note: currently relies on input emitting SET_CF in same basic block as uses
+ value *cf_index_value[2];
+ alu_node *mova;
public:
bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
ctx(sctx), dec(), bc(bc), pshader(pshader),
dw(), bc_ndw(), max_cf(),
sh(), error(), slots(), cgroup(),
- cf_map(), loop_stack(), gpr_reladdr() { }
+ cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
int decode();
int prepare();
@@ -852,6 +858,10 @@ private:
int prepare_loop(cf_node *c);
int prepare_if(cf_node *c);
+ void save_set_cf_index(value *val, unsigned idx);
+ value *get_cf_index_value(unsigned idx);
+ void save_mova(alu_node *mova);
+ alu_node *get_mova();
};
diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
index 0fc73c419a6..3c70ea7cd3d 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
@@ -27,6 +27,7 @@
#include "sb_bc.h"
#include "sb_shader.h"
#include "sb_pass.h"
+#include "eg_sq.h" // V_SQ_CF_INDEX_0/1
namespace r600_sb {
@@ -354,6 +355,14 @@ void bc_dump::dump(alu_node& n) {
s << " " << vec_bs[n.bc.bank_swizzle];
}
+ if (ctx.is_cayman()) {
+ if (n.bc.op == ALU_OP1_MOVA_INT) {
+ static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1",
+ " Unknown MOVA_INT dest" };
+ s << mova_str[std::min(n.bc.dst_gpr, 4u)]; // CM_V_SQ_MOVA_DST_AR_*
+ }
+ }
+
sblog << s.str() << "\n";
}
@@ -450,9 +459,9 @@ void bc_dump::dump(fetch_node& n) {
if (n.bc.fetch_whole_quad)
s << " FWQ";
if (ctx.is_egcm() && n.bc.resource_index_mode)
- s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode;
+ s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
if (ctx.is_egcm() && n.bc.sampler_index_mode)
- s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode;
+ s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
s << " UCF:" << n.bc.use_const_fields
<< " FMT(DTA:" << n.bc.data_format
@@ -470,9 +479,9 @@ void bc_dump::dump(fetch_node& n) {
if (n.bc.offset[k])
s << " O" << chans[k] << ":" << n.bc.offset[k];
if (ctx.is_egcm() && n.bc.resource_index_mode)
- s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode;
+ s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
if (ctx.is_egcm() && n.bc.sampler_index_mode)
- s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode;
+ s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
}
sblog << s.str() << "\n";
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 522ff9d956e..82826a90921 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -303,7 +303,8 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
assert(fdst.chan() == slot || slot == SLOT_TRANS);
}
- n->bc.dst_gpr = fdst.sel();
+ if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
+ n->bc.dst_gpr = fdst.sel();
n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
@@ -514,7 +515,7 @@ void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg
void bc_finalizer::emit_set_grad(fetch_node* f) {
- assert(f->src.size() == 12);
+ assert(f->src.size() == 12 || f->src.size() == 13);
unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
unsigned arg_start = 0;
@@ -809,8 +810,8 @@ void bc_finalizer::finalize_cf(cf_node* c) {
}
sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
- unsigned sel = v->select.sel();
- unsigned bank = sel >> 12;
+ unsigned sel = v->select.kcache_sel();
+ unsigned bank = v->select.kcache_bank();
unsigned chan = v->select.chan();
static const unsigned kc_base[] = {128, 160, 256, 288};
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 19bd0784a61..28ebfa2ce62 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -34,6 +34,7 @@
#include "r600_pipe.h"
#include "r600_shader.h"
+#include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1
#include <stack>
@@ -121,7 +122,7 @@ int bc_parser::parse_decls() {
return 0;
}
- if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
+ if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) {
assert(pshader->num_arrays);
@@ -328,6 +329,29 @@ int bc_parser::prepare_alu_clause(cf_node* cf) {
return 0;
}
+void bc_parser::save_set_cf_index(value *val, unsigned idx)
+{
+ assert(idx <= 1);
+ assert(val);
+ cf_index_value[idx] = val;
+}
+value *bc_parser::get_cf_index_value(unsigned idx)
+{
+ assert(idx <= 1);
+ assert(cf_index_value[idx]);
+ return cf_index_value[idx];
+}
+void bc_parser::save_mova(alu_node *mova)
+{
+ assert(mova);
+ this->mova = mova;
+}
+alu_node *bc_parser::get_mova()
+{
+ assert(mova);
+ return mova;
+}
+
int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
alu_node *n;
@@ -338,6 +362,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
for (node_iterator I = g->begin(), E = g->end();
I != E; ++I) {
n = static_cast<alu_node*>(*I);
+ bool ubo_indexing[2] = {};
if (!sh->assign_slot(n, slots[cgroup])) {
assert(!"alu slot assignment failed");
@@ -375,9 +400,14 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
n->dst.resize(1);
}
- if (flags & AF_MOVA) {
+ if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) {
+ // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
+ // DCE will kill this op
+ save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1);
+ } else if (flags & AF_MOVA) {
n->dst[0] = sh->get_special_value(SV_AR_INDEX);
+ save_mova(n);
n->flags |= NF_DONT_HOIST;
@@ -432,7 +462,12 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
bc_kcache &kc = cf->bc.kc[kc_set];
kc_addr = (kc.addr << 4) + (sel & 0x1F);
- n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan);
+ n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode);
+
+ if (kc.index_mode != KC_INDEX_NONE) {
+ assert(kc.index_mode != KC_LOCK_LOOP);
+ ubo_indexing[kc.index_mode - KC_INDEX_0] = true;
+ }
} else if (src.sel < MAX_GPR) {
value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
@@ -469,6 +504,19 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
}
}
}
+
+ // add UBO index values if any as dependencies
+ if (ubo_indexing[0]) {
+ n->src.push_back(get_cf_index_value(0));
+ }
+ if (ubo_indexing[1]) {
+ n->src.push_back(get_cf_index_value(1));
+ }
+
+ if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
+ ctx.is_cayman())
+ // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
+ save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1);
}
// pack multislot instructions into alu_packed_node
@@ -608,6 +656,13 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) {
n->bc.src_sel[s], false);
}
+ // Scheduler will emit the appropriate instructions to set CF_IDX0/1
+ if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
+ n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1));
+ }
+ if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
+ n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1));
+ }
}
}
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 9c2274e65a3..556a05da395 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -403,7 +403,8 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
n.bc.op == ALU_OP1_MOVA_GPR_INT)
&& n.bc.clamp == 0 && n.bc.omod == 0
- && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0) {
+ && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
+ n.src.size() == 1 /* RIM/SIM can be appended as additional values */) {
assign_source(n.dst[0], v0);
return true;
}
diff --git a/src/gallium/drivers/r600/sb/sb_gcm.cpp b/src/gallium/drivers/r600/sb/sb_gcm.cpp
index bccb6713967..236b2ea0031 100644
--- a/src/gallium/drivers/r600/sb/sb_gcm.cpp
+++ b/src/gallium/drivers/r600/sb/sb_gcm.cpp
@@ -37,6 +37,7 @@
#include "sb_bc.h"
#include "sb_shader.h"
#include "sb_pass.h"
+#include "eg_sq.h" // V_SQ_CF_INDEX_NONE
namespace r600_sb {
@@ -406,6 +407,14 @@ void gcm::bu_sched_bb(bb_node* bb) {
ncnt = 3;
}
+ bool sampler_indexing = false;
+ if (n->is_fetch_inst() &&
+ static_cast<fetch_node *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE)
+ {
+ sampler_indexing = true; // Give sampler indexed ops get their own clause
+ ncnt = sh.get_ctx().is_cayman() ? 2 : 3; // MOVA + SET_CF_IDX0/1
+ }
+
if ((sq == SQ_TEX || sq == SQ_VTX) &&
((last_count >= ctx.max_fetch/2 &&
check_alu_ready_count(24)) ||
@@ -418,7 +427,7 @@ void gcm::bu_sched_bb(bb_node* bb) {
bu_ready[sq].pop_front();
if (sq != SQ_CF) {
- if (!clause) {
+ if (!clause || sampler_indexing) {
clause = sh.create_clause(sq == SQ_ALU ?
NST_ALU_CLAUSE :
sq == SQ_TEX ? NST_TEX_CLAUSE :
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h
index 560a4a9b284..c612e6c4ec6 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -62,6 +62,13 @@ struct sel_chan
static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
static unsigned chan(unsigned idx) { return (idx-1) & 3; }
+
+ sel_chan(unsigned bank, unsigned index,
+ unsigned chan, alu_kcache_index_mode index_mode)
+ : id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {}
+ unsigned kcache_index_mode() const { return sel() >> 28; }
+ unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
+ unsigned kcache_bank() const { return kcache_sel() >> 12; }
};
inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
index c98b8fff764..5113b756847 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -36,6 +36,7 @@
#include "sb_shader.h"
#include "sb_pass.h"
#include "sb_sched.h"
+#include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
namespace r600_sb {
@@ -781,7 +782,14 @@ void post_scheduler::schedule_bb(bb_node* bb) {
sblog << "\n";
);
- if (n->subtype == NST_ALU_CLAUSE) {
+ // May require emitting ALU ops to load index registers
+ if (n->is_fetch_clause()) {
+ n->remove();
+ process_fetch(static_cast<container_node *>(n));
+ continue;
+ }
+
+ if (n->is_alu_clause()) {
n->remove();
process_alu(static_cast<container_node*>(n));
continue;
@@ -823,6 +831,108 @@ void post_scheduler::init_regmap() {
}
}
+static alu_node *create_set_idx(shader &sh, unsigned ar_idx) {
+ alu_node *a = sh.create_alu();
+
+ assert(ar_idx == V_SQ_CF_INDEX_0 || ar_idx == V_SQ_CF_INDEX_1);
+ if (ar_idx == V_SQ_CF_INDEX_0)
+ a->bc.set_op(ALU_OP0_SET_CF_IDX0);
+ else
+ a->bc.set_op(ALU_OP0_SET_CF_IDX1);
+ a->bc.slot = SLOT_X;
+ a->dst.resize(1); // Dummy needed for recolor
+
+ PSC_DUMP(
+ sblog << "created IDX load: ";
+ dump::dump_op(a);
+ sblog << "\n";
+ );
+
+ return a;
+}
+
+void post_scheduler::load_index_register(value *v, unsigned ar_idx)
+{
+ alu.reset();
+
+ if (!sh.get_ctx().is_cayman()) {
+ // Evergreen has to first load address register, then use CF_SET_IDX0/1
+ alu_group_tracker &rt = alu.grp();
+ alu_node *set_idx = create_set_idx(sh, ar_idx);
+ if (!rt.try_reserve(set_idx)) {
+ sblog << "can't emit SET_CF_IDX";
+ dump::dump_op(set_idx);
+ sblog << "\n";
+ }
+ process_group();
+
+ if (!alu.check_clause_limits()) {
+ // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
+ }
+ alu.emit_group();
+ }
+
+ alu_group_tracker &rt = alu.grp();
+ alu_node *a = alu.create_ar_load(v, ar_idx == V_SQ_CF_INDEX_1 ? SEL_Z : SEL_Y);
+
+ if (!rt.try_reserve(a)) {
+ sblog << "can't emit AR load : ";
+ dump::dump_op(a);
+ sblog << "\n";
+ }
+
+ process_group();
+
+ if (!alu.check_clause_limits()) {
+ // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
+ }
+
+ alu.emit_group();
+ alu.emit_clause(cur_bb);
+}
+
+void post_scheduler::process_fetch(container_node *c) {
+ if (c->empty())
+ return;
+
+ for (node_iterator N, I = c->begin(), E = c->end(); I != E; I = N) {
+ N = I;
+ ++N;
+
+ node *n = *I;
+
+ fetch_node *f = static_cast<fetch_node*>(n);
+
+ PSC_DUMP(
+ sblog << "process_tex ";
+ dump::dump_op(n);
+ sblog << " ";
+ );
+
+ // TODO: If same values used can avoid reloading index register
+ if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ||
+ f->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
+ unsigned index_mode = f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ?
+ f->bc.sampler_index_mode : f->bc.resource_index_mode;
+
+ // Currently require prior opt passes to use one TEX per indexed op
+ assert(f->parent->count() == 1);
+
+ value *v = f->src.back(); // Last src is index offset
+ assert(v);
+
+ cur_bb->push_front(c);
+
+ load_index_register(v, index_mode);
+ f->src.pop_back(); // Don't need index value any more
+
+ return;
+ }
+ }
+
+ cur_bb->push_front(c);
+}
+
void post_scheduler::process_alu(container_node *c) {
if (c->empty())
@@ -855,6 +965,7 @@ void post_scheduler::process_alu(container_node *c) {
if (uc) {
n->remove();
+
pending.push_back(n);
PSC_DUMP( sblog << "pending\n"; );
} else {
@@ -997,6 +1108,18 @@ void post_scheduler::init_globals(val_set &s, bool prealloc) {
}
}
+void post_scheduler::emit_index_registers() {
+ for (unsigned i = 0; i < 2; i++) {
+ if (alu.current_idx[i]) {
+ regmap = prev_regmap;
+ alu.discard_current_group();
+
+ load_index_register(alu.current_idx[i], KC_INDEX_0 + i);
+ alu.current_idx[i] = NULL;
+ }
+ }
+}
+
void post_scheduler::emit_clause() {
if (alu.current_ar) {
@@ -1005,7 +1128,11 @@ void post_scheduler::emit_clause() {
alu.emit_group();
}
- alu.emit_clause(cur_bb);
+ if (!alu.is_empty()) {
+ alu.emit_clause(cur_bb);
+ }
+
+ emit_index_registers();
}
void post_scheduler::schedule_alu(container_node *c) {
@@ -1017,6 +1144,14 @@ void post_scheduler::schedule_alu(container_node *c) {
prev_regmap = regmap;
if (!prepare_alu_group()) {
+ if (alu.current_idx[0] || alu.current_idx[1]) {
+ regmap = prev_regmap;
+ emit_clause();
+ init_globals(live, false);
+
+ continue;
+ }
+
if (alu.current_ar) {
emit_load_ar();
continue;
@@ -1028,6 +1163,7 @@ void post_scheduler::schedule_alu(container_node *c) {
regmap = prev_regmap;
emit_clause();
init_globals(live, false);
+
continue;
}
@@ -1180,7 +1316,7 @@ void post_scheduler::emit_load_ar() {
alu.discard_current_group();
alu_group_tracker &rt = alu.grp();
- alu_node *a = alu.create_ar_load();
+ alu_node *a = alu.create_ar_load(alu.current_ar, SEL_X);
if (!rt.try_reserve(a)) {
sblog << "can't emit AR load : ";
@@ -1287,6 +1423,42 @@ bool post_scheduler::map_src_val(value *v) {
}
bool post_scheduler::map_src_vec(vvec &vv, bool src) {
+ if (src) {
+ // Handle possible UBO indexing
+ bool ubo_indexing[2] = { false, false };
+ for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
+ value *v = *I;
+ if (!v)
+ continue;
+
+ if (v->is_kcache()) {
+ unsigned index_mode = v->select.kcache_index_mode();
+ if (index_mode == KC_INDEX_0 || index_mode == KC_INDEX_1) {
+ ubo_indexing[index_mode - KC_INDEX_0] = true;
+ }
+ }
+ }
+
+ // idx values stored at end of src vec, see bc_parser::prepare_alu_group
+ for (unsigned i = 2; i != 0; i--) {
+ if (ubo_indexing[i-1]) {
+ // TODO: skip adding value to kcache reservation somehow, causes
+ // unnecessary group breaks and cache line locks
+ value *v = vv.back();
+ if (alu.current_idx[i-1] && alu.current_idx[i-1] != v) {
+ PSC_DUMP(
+ sblog << "IDX" << i-1 << " already set to " <<
+ *alu.current_idx[i-1] << ", trying to set " << *v << "\n";
+ );
+ return false;
+ }
+
+ alu.current_idx[i-1] = v;
+ PSC_DUMP(sblog << "IDX" << i-1 << " set to " << *v << "\n";);
+ }
+ }
+ }
+
for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
value *v = *I;
if (!v)
@@ -1352,6 +1524,10 @@ void post_scheduler::dump_regmap() {
sblog << " current_AR: " << *alu.current_ar << "\n";
if (alu.current_pr)
sblog << " current_PR: " << *alu.current_pr << "\n";
+ if (alu.current_idx[0])
+ sblog << " current IDX0: " << *alu.current_idx[0] << "\n";
+ if (alu.current_idx[1])
+ sblog << " current IDX1: " << *alu.current_idx[1] << "\n";
}
void post_scheduler::recolor_locals() {
@@ -1441,6 +1617,13 @@ unsigned post_scheduler::try_add_instruction(node *n) {
unsigned avail_slots = rt.avail_slots();
+ // Cannot schedule in same clause as instructions using this index value
+ if (!n->dst.empty() && n->dst[0] &&
+ (n->dst[0] == alu.current_idx[0] || n->dst[0] == alu.current_idx[1])) {
+ PSC_DUMP(sblog << " CF_IDX source: " << *n->dst[0] << "\n";);
+ return 0;
+ }
+
if (n->is_alu_packed()) {
alu_packed_node *p = static_cast<alu_packed_node*>(n);
unsigned slots = p->get_slot_mask();
@@ -1770,7 +1953,7 @@ alu_clause_tracker::alu_clause_tracker(shader &sh)
grp0(sh), grp1(sh),
group(), clause(),
push_exec_mask(),
- current_ar(), current_pr() {}
+ current_ar(), current_pr(), current_idx() {}
void alu_clause_tracker::emit_group() {
@@ -1827,6 +2010,8 @@ bool alu_clause_tracker::check_clause_limits() {
// reserving slots to load AR and PR values
unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
+ // ...and index registers
+ reserve_slots += (current_idx[0] != NULL) + (current_idx[1] != NULL);
if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
return false;
@@ -1892,13 +2077,15 @@ unsigned rp_kcache_tracker::get_lines(kc_lines& lines) {
unsigned cnt = 0;
for (unsigned i = 0; i < sel_count; ++i) {
- unsigned line = rp[i];
+ unsigned line = rp[i] & 0x1fffffffu;
+ unsigned index_mode = rp[i] >> 29;
if (!line)
return cnt;
--line;
line = (sel_count == 2) ? line >> 5 : line >> 6;
+ line |= index_mode << 29;
if (lines.insert(line).second)
++cnt;
@@ -1913,14 +2100,18 @@ bool alu_kcache_tracker::update_kc() {
memcpy(old_kc, kc, sizeof(kc));
for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) {
- unsigned line = *I;
+ unsigned index_mode = *I >> 29;
+ unsigned line = *I & 0x1fffffffu;
unsigned bank = line >> 8;
+ assert(index_mode <= KC_INDEX_INVALID);
line &= 0xFF;
- if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line))
- ++kc[c-1].mode;
- else {
+ if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line) &&
+ kc[c-1].index_mode == index_mode)
+ {
+ kc[c-1].mode = KC_LOCK_2;
+ } else {
if (c == max_kcs) {
memcpy(kc, old_kc, sizeof(kc));
return false;
@@ -1930,17 +2121,16 @@ bool alu_kcache_tracker::update_kc() {
kc[c].bank = bank;
kc[c].addr = line;
+ kc[c].index_mode = index_mode;
++c;
}
}
return true;
}
-alu_node* alu_clause_tracker::create_ar_load() {
+alu_node* alu_clause_tracker::create_ar_load(value *v, chan_select ar_channel) {
alu_node *a = sh.create_alu();
- // FIXME use MOVA_GPR on R6xx
-
if (sh.get_ctx().uses_mova_gpr) {
a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
a->bc.slot = SLOT_TRANS;
@@ -1948,9 +2138,13 @@ alu_node* alu_clause_tracker::create_ar_load() {
a->bc.set_op(ALU_OP1_MOVA_INT);
a->bc.slot = SLOT_X;
}
+ a->bc.dst_chan = ar_channel;
+ if (ar_channel != SEL_X && sh.get_ctx().is_cayman()) {
+ a->bc.dst_gpr = ar_channel == SEL_Y ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
+ }
a->dst.resize(1);
- a->src.push_back(current_ar);
+ a->src.push_back(v);
PSC_DUMP(
sblog << "created AR load: ";
diff --git a/src/gallium/drivers/r600/sb/sb_sched.h b/src/gallium/drivers/r600/sb/sb_sched.h
index 87c45867e16..05b428ca884 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.h
+++ b/src/gallium/drivers/r600/sb/sb_sched.h
@@ -66,6 +66,7 @@ public:
class literal_tracker {
literal lt[4];
unsigned uc[4];
+
public:
literal_tracker() : lt(), uc() {}
@@ -219,6 +220,8 @@ public:
// bottom-up)
value *current_ar;
value *current_pr;
+ // current values of CF_IDX registers that need preloading
+ value *current_idx[2];
alu_clause_tracker(shader &sh);
@@ -235,7 +238,7 @@ public:
void new_group();
bool is_empty();
- alu_node* create_ar_load();
+ alu_node* create_ar_load(value *v, chan_select ar_channel);
void discard_current_group();
@@ -256,6 +259,7 @@ class post_scheduler : public pass {
val_set cleared_interf;
+ void emit_index_registers();
public:
post_scheduler(shader &sh) : pass(sh),
@@ -266,6 +270,9 @@ public:
void run_on(container_node *n);
void schedule_bb(bb_node *bb);
+ void load_index_register(value *v, unsigned idx);
+ void process_fetch(container_node *c);
+
void process_alu(container_node *c);
void schedule_alu(container_node *c);
bool prepare_alu_group();
diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp
index f996c0786d1..87e28e98157 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.cpp
+++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
@@ -188,9 +188,9 @@ value* shader::create_temp_value() {
return get_value(VLK_TEMP, id, 0);
}
-value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) {
+value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) {
return get_ro_value(kcache_values, VLK_KCACHE,
- sel_chan((bank << 12) | index, chan));
+ sel_chan(bank, index, chan, index_mode));
}
void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h
index 7955bba9b67..70bea891b76 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.h
+++ b/src/gallium/drivers/r600/sb/sb_shader.h
@@ -323,7 +323,7 @@ public:
value* get_special_ro_value(unsigned sel);
- value* get_kcache_value(unsigned bank, unsigned index, unsigned chan);
+ value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode);
value* get_value_version(value* v, unsigned ver);
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 2e9a0135647..ac99e732c94 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -272,6 +272,15 @@ static LLVMValueRef fetch_system_value(
return bitcast(bld_base, type, cval);
}
+static LLVMValueRef si_build_alloca_undef(struct gallivm_state *gallivm,
+ LLVMTypeRef type,
+ const char *name)
+{
+ LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
+ LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
+ return ptr;
+}
+
static void emit_declaration(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_declaration *decl)
@@ -285,7 +294,7 @@ static void emit_declaration(
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- ctx->soa.addr[idx][chan] = lp_build_alloca(
+ ctx->soa.addr[idx][chan] = si_build_alloca_undef(
&ctx->gallivm,
ctx->soa.bld_base.uint_bld.elem_type, "");
}
@@ -315,8 +324,9 @@ static void emit_declaration(
for (idx = first; idx <= last; idx++) {
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
- lp_build_alloca(bld_base->base.gallivm, bld_base->base.vec_type,
- "temp");
+ si_build_alloca_undef(bld_base->base.gallivm,
+ bld_base->base.vec_type,
+ "temp");
}
}
break;
@@ -347,7 +357,8 @@ static void emit_declaration(
unsigned chan;
assert(idx < RADEON_LLVM_MAX_OUTPUTS);
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm,
+ ctx->soa.outputs[idx][chan] = si_build_alloca_undef(
+ &ctx->gallivm,
ctx->soa.bld_base.base.elem_type, "");
}
}
@@ -908,7 +919,21 @@ static void emit_ucmp(
LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
}
-static void emit_cmp(
+static void emit_cmp(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMValueRef cond, *args = emit_data->args;
+
+ cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
+ bld_base->base.zero, "");
+
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSelect(builder, cond, args[1], args[2], "");
+}
+
+static void emit_set_cond(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
@@ -1382,6 +1407,51 @@ static void emit_imsb(const struct lp_build_tgsi_action * action,
LLVMBuildSelect(builder, cond, all_ones, msb, "");
}
+static void emit_iabs(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+
+ emit_data->output[emit_data->chan] =
+ lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
+ emit_data->args[0],
+ LLVMBuildNeg(builder,
+ emit_data->args[0], ""));
+}
+
+static void emit_minmax_int(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMIntPredicate op;
+
+ switch (emit_data->info->opcode) {
+ default:
+ assert(0);
+ case TGSI_OPCODE_IMAX:
+ op = LLVMIntSGT;
+ break;
+ case TGSI_OPCODE_IMIN:
+ op = LLVMIntSLT;
+ break;
+ case TGSI_OPCODE_UMAX:
+ op = LLVMIntUGT;
+ break;
+ case TGSI_OPCODE_UMIN:
+ op = LLVMIntULT;
+ break;
+ }
+
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSelect(builder,
+ LLVMBuildICmp(builder, op, emit_data->args[0],
+ emit_data->args[1], ""),
+ emit_data->args[0],
+ emit_data->args[1], "");
+}
+
void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
{
struct lp_type type;
@@ -1447,8 +1517,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
- bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
+ bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
@@ -1470,7 +1539,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
+ bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
@@ -1482,17 +1551,14 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
- bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
+ bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
- bld_base->op_actions[TGSI_OPCODE_IMAX].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax";
- bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin";
+ bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
+ bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
@@ -1508,8 +1574,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
- bld_base->op_actions[TGSI_OPCODE_LRP].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDGPU.lrp";
bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
@@ -1519,31 +1583,29 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
+ bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = "llvm.AMDGPU.rsq.clamped.f32";
bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
- bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
+ bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
+ bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
- bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp;
- bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp;
- bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp;
- bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp;
+ bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
+ bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
+ bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
+ bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
+ bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
- bld_base->op_actions[TGSI_OPCODE_UMAX].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax";
- bld_base->op_actions[TGSI_OPCODE_UMIN].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin";
+ bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
+ bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index c6605346771..697e60a50d9 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -469,7 +469,7 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
if (program->kernels) {
for (int i = 0; i < program->num_kernels; i++){
if (program->kernels[i].bo){
- si_shader_destroy(ctx, &program->kernels[i]);
+ si_shader_destroy(&program->kernels[i]);
}
}
FREE(program->kernels);
@@ -482,7 +482,7 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
FREE(program->shader.binary.config);
FREE(program->shader.binary.rodata);
FREE(program->shader.binary.global_symbol_offsets);
- si_shader_destroy(ctx, &program->shader);
+ si_shader_destroy(&program->shader);
#endif
pipe_resource_reference(
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index a0283b7c966..53c80dba602 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -271,6 +271,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_COMPUTE:
@@ -330,8 +332,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
- case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
- case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 32a702fcdf5..a119cbdc16c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1306,6 +1306,23 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
unsigned compressed = 0;
unsigned chan;
+ /* XXX: This controls which components of the output
+ * registers actually get exported. (e.g bit 0 means export
+ * X component, bit 1 means export Y component, etc.) I'm
+ * hard coding this to 0xf for now. In the future, we might
+ * want to do something else.
+ */
+ args[0] = lp_build_const_int32(base->gallivm, 0xf);
+
+ /* Specify whether the EXEC mask represents the valid mask */
+ args[1] = uint->zero;
+
+ /* Specify whether this is the last export */
+ args[2] = uint->zero;
+
+ /* Specify the target we are exporting */
+ args[3] = lp_build_const_int32(base->gallivm, target);
+
if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
int cbuf = target - V_008DFC_SQ_EXP_MRT;
@@ -1323,55 +1340,31 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
}
}
+ /* Set COMPR flag */
+ args[4] = compressed ? uint->one : uint->zero;
+
if (compressed) {
/* Pixel shader needs to pack output values before export */
- for (chan = 0; chan < 2; chan++ ) {
- args[0] = values[2 * chan];
- args[1] = values[2 * chan + 1];
- args[chan + 5] =
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.packf16",
- LLVMInt32TypeInContext(base->gallivm->context),
- args, 2,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ for (chan = 0; chan < 2; chan++) {
+ LLVMValueRef pack_args[2] = {
+ values[2 * chan],
+ values[2 * chan + 1]
+ };
+ LLVMValueRef packed;
+
+ packed = lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.packf16",
+ LLVMInt32TypeInContext(base->gallivm->context),
+ pack_args, 2,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
args[chan + 7] = args[chan + 5] =
LLVMBuildBitCast(base->gallivm->builder,
- args[chan + 5],
+ packed,
LLVMFloatTypeInContext(base->gallivm->context),
"");
}
-
- /* Set COMPR flag */
- args[4] = uint->one;
- } else {
- for (chan = 0; chan < 4; chan++ )
- /* +5 because the first output value will be
- * the 6th argument to the intrinsic. */
- args[chan + 5] = values[chan];
-
- /* Clear COMPR flag */
- args[4] = uint->zero;
- }
-
- /* XXX: This controls which components of the output
- * registers actually get exported. (e.g bit 0 means export
- * X component, bit 1 means export Y component, etc.) I'm
- * hard coding this to 0xf for now. In the future, we might
- * want to do something else. */
- args[0] = lp_build_const_int32(base->gallivm, 0xf);
-
- /* Specify whether the EXEC mask represents the valid mask */
- args[1] = uint->zero;
-
- /* Specify whether this is the last export */
- args[2] = uint->zero;
-
- /* Specify the target we are exporting */
- args[3] = lp_build_const_int32(base->gallivm, target);
-
- /* XXX: We probably need to keep track of the output
- * values, so we know what we are passing to the next
- * stage. */
+ } else
+ memcpy(&args[5], values, sizeof(values[0]) * 4);
}
/* Load from output pointers and initialize arguments for the shader export intrinsic */
@@ -2083,6 +2076,45 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
+ /* Vertex color clamping.
+ *
+ * This uses a state constant loaded in a user data SGPR and
+ * an IF statement is added that clamps all colors if the constant
+ * is true.
+ */
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
+ !si_shader_ctx->shader->is_gs_copy_shader) {
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef cond = NULL;
+ LLVMValueRef addr, val;
+
+ for (i = 0; i < info->num_outputs; i++) {
+ if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
+ info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
+ continue;
+
+ /* We've found a color. */
+ if (!cond) {
+ /* The state is in the first bit of the user SGPR. */
+ cond = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_VS_STATE_BITS);
+ cond = LLVMBuildTrunc(gallivm->builder, cond,
+ LLVMInt1TypeInContext(gallivm->context), "");
+ lp_build_if(&if_ctx, gallivm, cond);
+ }
+
+ for (j = 0; j < 4; j++) {
+ addr = si_shader_ctx->radeon_bld.soa.outputs[i][j];
+ val = LLVMBuildLoad(gallivm->builder, addr, "");
+ val = radeon_llvm_saturate(bld_base, val);
+ LLVMBuildStore(gallivm->builder, val, addr);
+ }
+ }
+
+ if (cond)
+ lp_build_endif(&if_ctx);
+ }
+
for (i = 0; i < info->num_outputs; i++) {
outputs[i].name = info->output_semantic_name[i];
outputs[i].sid = info->output_semantic_index[i];
@@ -2117,6 +2149,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
struct lp_build_context * base = &bld_base->base;
struct lp_build_context * uint = &bld_base->uint_bld;
struct tgsi_shader_info *info = &shader->selector->info;
+ LLVMBuilderRef builder = base->gallivm->builder;
LLVMValueRef args[9];
LLVMValueRef last_args[9] = { 0 };
int depth_index = -1, stencil_index = -1, samplemask_index = -1;
@@ -2143,6 +2176,16 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
target = V_008DFC_SQ_EXP_MRT + semantic_index;
alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3];
+ if (si_shader_ctx->shader->key.ps.clamp_color) {
+ for (int j = 0; j < 4; j++) {
+ LLVMValueRef ptr = si_shader_ctx->radeon_bld.soa.outputs[i][j];
+ LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
+
+ result = radeon_llvm_saturate(bld_base, result);
+ LLVMBuildStore(builder, result, ptr);
+ }
+ }
+
if (si_shader_ctx->shader->key.ps.alpha_to_one)
LLVMBuildStore(base->gallivm->builder,
base->one, alpha_ptr);
@@ -2153,6 +2196,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
si_scale_alpha_by_sample_mask(bld_base, alpha_ptr);
+
break;
default:
target = 0;
@@ -3440,6 +3484,9 @@ static void create_function(struct si_shader_context *si_shader_ctx)
if (shader->is_gs_copy_shader) {
last_array_pointer = SI_PARAM_CONST;
num_params = SI_PARAM_CONST+1;
+ } else {
+ params[SI_PARAM_VS_STATE_BITS] = i32;
+ num_params = SI_PARAM_VS_STATE_BITS+1;
}
/* The locations of the other parameters are assigned dynamically. */
@@ -3982,6 +4029,7 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
key->vs.es_enabled_outputs);
fprintf(f, " as_es = %u\n", key->vs.as_es);
fprintf(f, " as_ls = %u\n", key->vs.as_ls);
+ fprintf(f, " export_prim_id = %u\n", key->vs.export_prim_id);
break;
case PIPE_SHADER_TESS_CTRL:
@@ -3993,6 +4041,7 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
fprintf(f, " es_enabled_outputs = 0x%"PRIx64"\n",
key->tes.es_enabled_outputs);
fprintf(f, " as_es = %u\n", key->tes.as_es);
+ fprintf(f, " export_prim_id = %u\n", key->tes.export_prim_id);
break;
case PIPE_SHADER_GEOMETRY:
@@ -4005,6 +4054,7 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
fprintf(f, " alpha_func = %u\n", key->ps.alpha_func);
fprintf(f, " alpha_to_one = %u\n", key->ps.alpha_to_one);
fprintf(f, " poly_stipple = %u\n", key->ps.poly_stipple);
+ fprintf(f, " clamp_color = %u\n", key->ps.clamp_color);
break;
default:
@@ -4196,10 +4246,12 @@ out:
return r;
}
-void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
+void si_shader_destroy(struct si_shader *shader)
{
- if (shader->gs_copy_shader)
- si_shader_destroy(ctx, shader->gs_copy_shader);
+ if (shader->gs_copy_shader) {
+ si_shader_destroy(shader->gs_copy_shader);
+ FREE(shader->gs_copy_shader);
+ }
if (shader->scratch_bo)
r600_resource_reference(&shader->scratch_bo, NULL);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index b92fa02a171..54dad726d01 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -83,6 +83,7 @@ struct radeon_shader_reloc;
#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */
#define SI_SGPR_BASE_VERTEX 10 /* VS only */
#define SI_SGPR_START_INSTANCE 11 /* VS only */
+#define SI_SGPR_VS_STATE_BITS 12 /* VS(VS) only */
#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */
#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */
#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
@@ -90,8 +91,9 @@ struct radeon_shader_reloc;
#define SI_SGPR_ALPHA_REF 8 /* PS only */
#define SI_SGPR_PS_STATE_BITS 9 /* PS only */
-#define SI_VS_NUM_USER_SGPR 12
-#define SI_LS_NUM_USER_SGPR 13
+#define SI_VS_NUM_USER_SGPR 13 /* API VS */
+#define SI_ES_NUM_USER_SGPR 12 /* API VS */
+#define SI_LS_NUM_USER_SGPR 13 /* API VS */
#define SI_TCS_NUM_USER_SGPR 11
#define SI_TES_NUM_USER_SGPR 10
#define SI_GS_NUM_USER_SGPR 8
@@ -108,6 +110,8 @@ struct radeon_shader_reloc;
#define SI_PARAM_VERTEX_BUFFER 4
#define SI_PARAM_BASE_VERTEX 5
#define SI_PARAM_START_INSTANCE 6
+/* [0] = clamp vertex color */
+#define SI_PARAM_VS_STATE_BITS 7
/* the other VS parameters are assigned dynamically */
/* Offsets where TCS outputs and TCS patch outputs live in LDS:
@@ -227,6 +231,7 @@ union si_shader_key {
unsigned alpha_to_one:1;
unsigned poly_stipple:1;
unsigned poly_line_smoothing:1;
+ unsigned clamp_color:1;
} ps;
struct {
unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
@@ -324,7 +329,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f);
int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
LLVMTargetMachineRef tm, LLVMModuleRef mod);
-void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
+void si_shader_destroy(struct si_shader *shader);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 00d4bc1fbc2..e6475364f98 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -694,7 +694,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
rs->poly_smooth = state->poly_smooth;
rs->uses_poly_offset = state->offset_point || state->offset_line ||
state->offset_tri;
-
+ rs->clamp_fragment_color = state->clamp_fragment_color;
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
rs->pa_sc_line_stipple = state->line_stipple_enable ?
@@ -760,6 +760,8 @@ static void *si_create_rs_state(struct pipe_context *ctx,
state->fill_back != PIPE_POLYGON_MODE_FILL) |
S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
+ si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 +
+ SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color);
/* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
for (i = 0; i < 3; i++) {
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 6a567688ee4..fba6619d2fd 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -60,6 +60,7 @@ struct si_state_rasterizer {
bool line_smooth;
bool poly_smooth;
bool uses_poly_offset;
+ bool clamp_fragment_color;
};
struct si_dsa_stencil_ref_part {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index f673388b121..c98509bb0b9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -122,7 +122,8 @@ static void si_shader_ls(struct si_shader *shader)
shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
S_00B528_SGPRS((num_sgprs - 1) / 8) |
- S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
+ S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
+ S_00B528_DX10_CLAMP(shader->dx10_clamp_mode);
shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
}
@@ -154,7 +155,8 @@ static void si_shader_hs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
- S_00B428_SGPRS((num_sgprs - 1) / 8));
+ S_00B428_SGPRS((num_sgprs - 1) / 8) |
+ S_00B428_DX10_CLAMP(shader->dx10_clamp_mode));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
S_00B42C_USER_SGPR(num_user_sgprs) |
S_00B42C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
@@ -177,7 +179,7 @@ static void si_shader_es(struct si_shader *shader)
if (shader->selector->type == PIPE_SHADER_VERTEX) {
vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
- num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ num_user_sgprs = SI_ES_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = 3; /* all components are needed for TES */
num_user_sgprs = SI_TES_NUM_USER_SGPR;
@@ -570,6 +572,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->ps.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
(is_line && rs->line_smooth)) &&
sctx->framebuffer.nr_samples <= 1;
+ key->ps.clamp_color = rs->clamp_fragment_color;
}
key->ps.alpha_func = PIPE_FUNC_ALWAYS;
@@ -645,9 +648,8 @@ static int si_shader_select(struct pipe_context *ctx,
return 0;
}
-static void *si_create_shader_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state,
- unsigned pipe_shader_type)
+static void *si_create_shader_selector(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
{
struct si_screen *sscreen = (struct si_screen *)ctx->screen;
struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
@@ -656,7 +658,6 @@ static void *si_create_shader_state(struct pipe_context *ctx,
if (!sel)
return NULL;
- sel->type = pipe_shader_type;
sel->tokens = tgsi_dup_tokens(state->tokens);
if (!sel->tokens) {
FREE(sel);
@@ -665,6 +666,7 @@ static void *si_create_shader_state(struct pipe_context *ctx,
sel->so = state->stream_output;
tgsi_scan_shader(state->tokens, &sel->info);
+ sel->type = util_pipe_shader_from_tgsi_processor(sel->info.processor);
p_atomic_inc(&sscreen->b.num_shaders_created);
/* First set which opcode uses which (i,j) pair. */
@@ -695,7 +697,7 @@ static void *si_create_shader_state(struct pipe_context *ctx,
sel->info.uses_linear_centroid +
sel->info.uses_linear_sample >= 2;
- switch (pipe_shader_type) {
+ switch (sel->type) {
case PIPE_SHADER_GEOMETRY:
sel->gs_output_prim =
sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
@@ -761,36 +763,6 @@ static void *si_create_shader_state(struct pipe_context *ctx,
return sel;
}
-static void *si_create_fs_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
-}
-
-static void *si_create_gs_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
-}
-
-static void *si_create_vs_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
-}
-
-static void *si_create_tcs_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_CTRL);
-}
-
-static void *si_create_tes_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
-{
- return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
-}
-
/**
* Normally, we only emit 1 viewport and 1 scissor if no shader is using
* the VIEWPORT_INDEX output, and emitting the other viewports and scissors
@@ -905,11 +877,21 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
}
-static void si_delete_shader_selector(struct pipe_context *ctx,
- struct si_shader_selector *sel)
+static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = (struct si_shader_selector *)state;
struct si_shader *p = sel->current, *c;
+ struct si_shader_selector **current_shader[SI_NUM_SHADERS] = {
+ [PIPE_SHADER_VERTEX] = &sctx->vs_shader,
+ [PIPE_SHADER_TESS_CTRL] = &sctx->tcs_shader,
+ [PIPE_SHADER_TESS_EVAL] = &sctx->tes_shader,
+ [PIPE_SHADER_GEOMETRY] = &sctx->gs_shader,
+ [PIPE_SHADER_FRAGMENT] = &sctx->ps_shader,
+ };
+
+ if (*current_shader[sel->type] == sel)
+ *current_shader[sel->type] = NULL;
while (p) {
c = p->next_variant;
@@ -940,7 +922,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx,
break;
}
- si_shader_destroy(ctx, p);
+ si_shader_destroy(p);
free(p);
p = c;
}
@@ -949,66 +931,6 @@ static void si_delete_shader_selector(struct pipe_context *ctx,
free(sel);
}
-static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_shader_selector *sel = (struct si_shader_selector *)state;
-
- if (sctx->vs_shader == sel) {
- sctx->vs_shader = NULL;
- }
-
- si_delete_shader_selector(ctx, sel);
-}
-
-static void si_delete_gs_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_shader_selector *sel = (struct si_shader_selector *)state;
-
- if (sctx->gs_shader == sel) {
- sctx->gs_shader = NULL;
- }
-
- si_delete_shader_selector(ctx, sel);
-}
-
-static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_shader_selector *sel = (struct si_shader_selector *)state;
-
- if (sctx->ps_shader == sel) {
- sctx->ps_shader = NULL;
- }
-
- si_delete_shader_selector(ctx, sel);
-}
-
-static void si_delete_tcs_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_shader_selector *sel = (struct si_shader_selector *)state;
-
- if (sctx->tcs_shader == sel) {
- sctx->tcs_shader = NULL;
- }
-
- si_delete_shader_selector(ctx, sel);
-}
-
-static void si_delete_tes_shader(struct pipe_context *ctx, void *state)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_shader_selector *sel = (struct si_shader_selector *)state;
-
- if (sctx->tes_shader == sel) {
- sctx->tes_shader = NULL;
- }
-
- si_delete_shader_selector(ctx, sel);
-}
-
static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
@@ -1284,30 +1206,23 @@ static int si_update_scratch_buffer(struct si_context *sctx,
static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
{
- if (!sctx->scratch_buffer)
- return 0;
-
- return sctx->scratch_buffer->b.b.width0;
+ return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0;
}
-static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_context *sctx,
- struct si_shader_selector *sel)
+static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader_selector *sel)
{
- if (!sel)
- return 0;
-
- return sel->current->scratch_bytes_per_wave;
+ return sel ? sel->current->scratch_bytes_per_wave : 0;
}
static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
{
unsigned bytes = 0;
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tcs_shader));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tes_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader));
return bytes;
}
@@ -1322,7 +1237,6 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
int r;
if (scratch_needed_size > 0) {
-
if (scratch_needed_size > current_scratch_buffer_size) {
/* Create a bigger scratch buffer */
pipe_resource_reference(
@@ -1361,38 +1275,26 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
/* VS can be bound as LS, ES, or VS. */
- if (sctx->tes_shader) {
- r = si_update_scratch_buffer(sctx, sctx->vs_shader);
- if (r < 0)
- return false;
- if (r == 1)
+ r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+ if (r < 0)
+ return false;
+ if (r == 1) {
+ if (sctx->tes_shader)
si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
- } else if (sctx->gs_shader) {
- r = si_update_scratch_buffer(sctx, sctx->vs_shader);
- if (r < 0)
- return false;
- if (r == 1)
+ else if (sctx->gs_shader)
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
- } else {
- r = si_update_scratch_buffer(sctx, sctx->vs_shader);
- if (r < 0)
- return false;
- if (r == 1)
+ else
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
}
/* TES can be bound as ES or VS. */
- if (sctx->gs_shader) {
- r = si_update_scratch_buffer(sctx, sctx->tes_shader);
- if (r < 0)
- return false;
- if (r == 1)
+ r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+ if (r < 0)
+ return false;
+ if (r == 1) {
+ if (sctx->gs_shader)
si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
- } else {
- r = si_update_scratch_buffer(sctx, sctx->tes_shader);
- if (r < 0)
- return false;
- if (r == 1)
+ else
si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
}
}
@@ -1661,11 +1563,11 @@ void si_init_shader_functions(struct si_context *sctx)
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input);
- sctx->b.b.create_vs_state = si_create_vs_state;
- sctx->b.b.create_tcs_state = si_create_tcs_state;
- sctx->b.b.create_tes_state = si_create_tes_state;
- sctx->b.b.create_gs_state = si_create_gs_state;
- sctx->b.b.create_fs_state = si_create_fs_state;
+ sctx->b.b.create_vs_state = si_create_shader_selector;
+ sctx->b.b.create_tcs_state = si_create_shader_selector;
+ sctx->b.b.create_tes_state = si_create_shader_selector;
+ sctx->b.b.create_gs_state = si_create_shader_selector;
+ sctx->b.b.create_fs_state = si_create_shader_selector;
sctx->b.b.bind_vs_state = si_bind_vs_shader;
sctx->b.b.bind_tcs_state = si_bind_tcs_shader;
@@ -1673,9 +1575,9 @@ void si_init_shader_functions(struct si_context *sctx)
sctx->b.b.bind_gs_state = si_bind_gs_shader;
sctx->b.b.bind_fs_state = si_bind_ps_shader;
- sctx->b.b.delete_vs_state = si_delete_vs_shader;
- sctx->b.b.delete_tcs_state = si_delete_tcs_shader;
- sctx->b.b.delete_tes_state = si_delete_tes_shader;
- sctx->b.b.delete_gs_state = si_delete_gs_shader;
- sctx->b.b.delete_fs_state = si_delete_ps_shader;
+ sctx->b.b.delete_vs_state = si_delete_shader_selector;
+ sctx->b.b.delete_tcs_state = si_delete_shader_selector;
+ sctx->b.b.delete_tes_state = si_delete_shader_selector;
+ sctx->b.b.delete_gs_state = si_delete_shader_selector;
+ sctx->b.b.delete_fs_state = si_delete_shader_selector;
}
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index 2bf795de22d..f8622b96f45 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -312,6 +312,8 @@ void svga_context_flush( struct svga_context *svga,
*/
svga->swc->flush(svga->swc, &fence);
+ svga->hud.num_flushes++;
+
svga_screen_cache_flush(svgascreen, fence);
/* To force the re-emission of rendertargets and texture sampler bindings on
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index e8575f36c3b..bcce18a3502 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -44,10 +44,21 @@
/** Non-GPU queries for gallium HUD */
-#define SVGA_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define SVGA_QUERY_FALLBACKS (PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 2)
-#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 3)
+/* per-frame counters */
+#define SVGA_QUERY_NUM_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define SVGA_QUERY_NUM_FALLBACKS (PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define SVGA_QUERY_NUM_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define SVGA_QUERY_NUM_VALIDATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 3)
+#define SVGA_QUERY_MAP_BUFFER_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 4)
+#define SVGA_QUERY_NUM_RESOURCES_MAPPED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+/* running total counters */
+#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 6)
+#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+/*SVGA_QUERY_MAX has to be last because it is size of an array*/
+#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 11)
/**
* Maximum supported number of constant buffers per shader
@@ -463,9 +474,18 @@ struct svga_context
/** List of buffers with queued transfers */
struct list_head dirty_buffers;
- /** performance / info queries */
- uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */
- uint64_t num_fallbacks; /**< SVGA_QUERY_FALLBACKS */
+ /** performance / info queries for HUD */
+ struct {
+ uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */
+ uint64_t num_fallbacks; /**< SVGA_QUERY_NUM_FALLBACKS */
+ uint64_t num_flushes; /**< SVGA_QUERY_NUM_FLUSHES */
+ uint64_t num_validations; /**< SVGA_QUERY_NUM_VALIDATIONS */
+ uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */
+ uint64_t num_resources_mapped; /**< SVGA_QUERY_NUM_RESOURCES_MAPPED */
+ uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */
+ uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */
+ uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
+ } hud;
/** The currently bound stream output targets */
unsigned num_so_targets;
diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c
index 06bb3e3bd7e..0c9d6129b53 100644
--- a/src/gallium/drivers/svga/svga_pipe_blend.c
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -321,6 +321,8 @@ svga_create_blend_state(struct pipe_context *pipe,
define_blend_state_object(svga, blend);
}
+ svga->hud.num_state_objects++;
+
return blend;
}
@@ -359,6 +361,7 @@ static void svga_delete_blend_state(struct pipe_context *pipe,
}
FREE(blend);
+ svga->hud.num_state_objects--;
}
static void svga_set_blend_color( struct pipe_context *pipe,
diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
index 5ea623be4d9..d84ed1df48e 100644
--- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -202,6 +202,8 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
define_depth_stencil_state_object(svga, ds);
}
+ svga->hud.num_state_objects++;
+
return ds;
}
@@ -248,6 +250,7 @@ static void svga_delete_depth_stencil_state(struct pipe_context *pipe,
}
FREE(depth_stencil);
+ svga->hud.num_state_objects--;
}
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index 303d4565cdb..50ebb53df90 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -177,7 +177,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
enum pipe_error ret = 0;
boolean needed_swtnl;
- svga->num_draw_calls++; /* for SVGA_QUERY_DRAW_CALLS */
+ svga->hud.num_draw_calls++; /* for SVGA_QUERY_NUM_DRAW_CALLS */
if (u_reduced_prim(info->mode) == PIPE_PRIM_TRIANGLES &&
svga->curr.rast->templ.cull_face == PIPE_FACE_FRONT_AND_BACK)
@@ -219,7 +219,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
#endif
if (svga->state.sw.need_swtnl) {
- svga->num_fallbacks++; /* for SVGA_QUERY_FALLBACKS */
+ svga->hud.num_fallbacks++; /* for SVGA_QUERY_NUM_FALLBACKS */
if (!needed_swtnl) {
/*
* We're switching from HW to SW TNL. SW TNL will require mapping all
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 7081e5a1c43..8b9818334ca 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -720,9 +720,17 @@ svga_create_query(struct pipe_context *pipe,
define_query_vgpu10(svga, sq,
sizeof(SVGADXTimestampQueryResult));
break;
- case SVGA_QUERY_DRAW_CALLS:
- case SVGA_QUERY_FALLBACKS:
+ case SVGA_QUERY_NUM_DRAW_CALLS:
+ case SVGA_QUERY_NUM_FALLBACKS:
+ case SVGA_QUERY_NUM_FLUSHES:
case SVGA_QUERY_MEMORY_USED:
+ case SVGA_QUERY_NUM_SHADERS:
+ case SVGA_QUERY_NUM_RESOURCES:
+ case SVGA_QUERY_NUM_STATE_OBJECTS:
+ case SVGA_QUERY_NUM_VALIDATIONS:
+ case SVGA_QUERY_MAP_BUFFER_TIME:
+ case SVGA_QUERY_NUM_SURFACE_VIEWS:
+ case SVGA_QUERY_NUM_RESOURCES_MAPPED:
break;
default:
assert(!"unexpected query type in svga_create_query()");
@@ -778,9 +786,17 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
destroy_query_vgpu10(svga, sq);
sws->fence_reference(sws, &sq->fence, NULL);
break;
- case SVGA_QUERY_DRAW_CALLS:
- case SVGA_QUERY_FALLBACKS:
+ case SVGA_QUERY_NUM_DRAW_CALLS:
+ case SVGA_QUERY_NUM_FALLBACKS:
+ case SVGA_QUERY_NUM_FLUSHES:
case SVGA_QUERY_MEMORY_USED:
+ case SVGA_QUERY_NUM_SHADERS:
+ case SVGA_QUERY_NUM_RESOURCES:
+ case SVGA_QUERY_NUM_STATE_OBJECTS:
+ case SVGA_QUERY_NUM_VALIDATIONS:
+ case SVGA_QUERY_MAP_BUFFER_TIME:
+ case SVGA_QUERY_NUM_SURFACE_VIEWS:
+ case SVGA_QUERY_NUM_RESOURCES_MAPPED:
/* nothing */
break;
default:
@@ -842,13 +858,29 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
ret = begin_query_vgpu10(svga, sq);
assert(ret == PIPE_OK);
break;
- case SVGA_QUERY_DRAW_CALLS:
- sq->begin_count = svga->num_draw_calls;
+ case SVGA_QUERY_NUM_DRAW_CALLS:
+ sq->begin_count = svga->hud.num_draw_calls;
break;
- case SVGA_QUERY_FALLBACKS:
- sq->begin_count = svga->num_fallbacks;
+ case SVGA_QUERY_NUM_FALLBACKS:
+ sq->begin_count = svga->hud.num_fallbacks;
+ break;
+ case SVGA_QUERY_NUM_FLUSHES:
+ sq->begin_count = svga->hud.num_flushes;
+ break;
+ case SVGA_QUERY_NUM_VALIDATIONS:
+ sq->begin_count = svga->hud.num_validations;
+ break;
+ case SVGA_QUERY_MAP_BUFFER_TIME:
+ sq->begin_count = svga->hud.map_buffer_time;
+ break;
+ case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+ sq->begin_count = svga->hud.num_resources_mapped;
break;
case SVGA_QUERY_MEMORY_USED:
+ case SVGA_QUERY_NUM_SHADERS:
+ case SVGA_QUERY_NUM_RESOURCES:
+ case SVGA_QUERY_NUM_STATE_OBJECTS:
+ case SVGA_QUERY_NUM_SURFACE_VIEWS:
/* nothing */
break;
default:
@@ -916,13 +948,29 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
ret = end_query_vgpu10(svga, sq);
assert(ret == PIPE_OK);
break;
- case SVGA_QUERY_DRAW_CALLS:
- sq->end_count = svga->num_draw_calls;
+ case SVGA_QUERY_NUM_DRAW_CALLS:
+ sq->end_count = svga->hud.num_draw_calls;
+ break;
+ case SVGA_QUERY_NUM_FALLBACKS:
+ sq->end_count = svga->hud.num_fallbacks;
+ break;
+ case SVGA_QUERY_NUM_FLUSHES:
+ sq->end_count = svga->hud.num_flushes;
break;
- case SVGA_QUERY_FALLBACKS:
- sq->end_count = svga->num_fallbacks;
+ case SVGA_QUERY_NUM_VALIDATIONS:
+ sq->end_count = svga->hud.num_validations;
+ break;
+ case SVGA_QUERY_MAP_BUFFER_TIME:
+ sq->end_count = svga->hud.map_buffer_time;
+ break;
+ case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+ sq->end_count = svga->hud.num_resources_mapped;
break;
case SVGA_QUERY_MEMORY_USED:
+ case SVGA_QUERY_NUM_SHADERS:
+ case SVGA_QUERY_NUM_RESOURCES:
+ case SVGA_QUERY_NUM_STATE_OBJECTS:
+ case SVGA_QUERY_NUM_SURFACE_VIEWS:
/* nothing */
break;
default:
@@ -1007,13 +1055,30 @@ svga_get_query_result(struct pipe_context *pipe,
*result = (uint64_t)sResult.numPrimitivesWritten;
break;
}
- case SVGA_QUERY_DRAW_CALLS:
- /* fall-through */
- case SVGA_QUERY_FALLBACKS:
+ /* These are per-frame counters */
+ case SVGA_QUERY_NUM_DRAW_CALLS:
+ case SVGA_QUERY_NUM_FALLBACKS:
+ case SVGA_QUERY_NUM_FLUSHES:
+ case SVGA_QUERY_NUM_VALIDATIONS:
+ case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+ case SVGA_QUERY_MAP_BUFFER_TIME:
vresult->u64 = sq->end_count - sq->begin_count;
break;
+ /* These are running total counters */
case SVGA_QUERY_MEMORY_USED:
- vresult->u64 = svgascreen->total_resource_bytes;
+ vresult->u64 = svgascreen->hud.total_resource_bytes;
+ break;
+ case SVGA_QUERY_NUM_SHADERS:
+ vresult->u64 = svga->hud.num_shaders;
+ break;
+ case SVGA_QUERY_NUM_RESOURCES:
+ vresult->u64 = svgascreen->hud.num_resources;
+ break;
+ case SVGA_QUERY_NUM_STATE_OBJECTS:
+ vresult->u64 = svga->hud.num_state_objects;
+ break;
+ case SVGA_QUERY_NUM_SURFACE_VIEWS:
+ vresult->u64 = svga->hud.num_surface_views;
break;
default:
assert(!"unexpected query type in svga_get_query_result");
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index a7aadac0111..6310b7a5e86 100644
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -352,6 +352,8 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
define_rasterizer_object(svga, rast);
}
+ svga->hud.num_state_objects++;
+
return rast;
}
@@ -392,6 +394,7 @@ svga_delete_rasterizer_state(struct pipe_context *pipe, void *state)
}
FREE(state);
+ svga->hud.num_state_objects--;
}
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index 60e2d44ace4..95241176510 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -273,6 +273,8 @@ svga_create_sampler_state(struct pipe_context *pipe,
cso->min_lod, cso->view_min_lod, cso->view_max_lod,
cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
+ svga->hud.num_state_objects++;
+
return cso;
}
@@ -328,6 +330,7 @@ static void svga_delete_sampler_state(struct pipe_context *pipe,
}
FREE(sampler);
+ svga->hud.num_state_objects--;
}
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index e0932a9dbc1..b932c568f53 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -274,6 +274,9 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
translate_vertex_decls(svga, velems);
}
}
+
+ svga->hud.num_state_objects++;
+
return velems;
}
@@ -315,6 +318,7 @@ svga_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
}
FREE(velems);
+ svga->hud.num_state_objects--;
}
void svga_cleanup_vertex_state( struct svga_context *svga )
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index 57e37fcfe14..71f2f4f2779 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -29,6 +29,7 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
#include "os/os_thread.h"
+#include "os/os_time.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_resource.h"
@@ -77,6 +78,7 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
struct svga_buffer *sbuf = svga_buffer(resource);
struct pipe_transfer *transfer;
uint8_t *map;
+ int64_t begin = os_time_get();
transfer = CALLOC_STRUCT(pipe_transfer);
if (transfer == NULL) {
@@ -244,6 +246,8 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
FREE(transfer);
}
+ svga->hud.map_buffer_time += (os_time_get() - begin);
+
return map;
}
@@ -331,7 +335,10 @@ svga_buffer_destroy( struct pipe_screen *screen,
if (sbuf->swbuf && !sbuf->user)
align_free(sbuf->swbuf);
- ss->total_resource_bytes -= sbuf->size;
+ ss->hud.total_resource_bytes -= sbuf->size;
+ assert(ss->hud.num_resources > 0);
+ if (ss->hud.num_resources > 0)
+ ss->hud.num_resources--;
FREE(sbuf);
}
@@ -409,7 +416,9 @@ svga_buffer_create(struct pipe_screen *screen,
(debug_reference_descriptor)debug_describe_resource, 0);
sbuf->size = util_resource_size(&sbuf->b.b);
- ss->total_resource_bytes += sbuf->size;
+ ss->hud.total_resource_bytes += sbuf->size;
+
+ ss->hud.num_resources++;
return &sbuf->b.b;
@@ -427,6 +436,7 @@ svga_user_buffer_create(struct pipe_screen *screen,
unsigned bind)
{
struct svga_buffer *sbuf;
+ struct svga_screen *ss = svga_screen(screen);
sbuf = CALLOC_STRUCT(svga_buffer);
if (!sbuf)
@@ -450,6 +460,8 @@ svga_user_buffer_create(struct pipe_screen *screen,
debug_reference(&sbuf->b.b.reference,
(debug_reference_descriptor)debug_describe_resource, 0);
+ ss->hud.num_resources++;
+
return &sbuf->b.b;
no_sbuf:
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h
index 75e12c3220c..0591f8960b9 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -253,6 +253,9 @@ svga_buffer_hw_storage_map(struct svga_context *svga,
unsigned flags, boolean *retry)
{
struct svga_winsys_screen *sws = svga_buffer_winsys_screen(sbuf);
+
+ svga->hud.num_resources_mapped++;
+
if (sws->have_gb_objects) {
return svga->swc->surface_map(svga->swc, sbuf->handle, flags, retry);
} else {
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index 90787be8073..a02d1e495ff 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -29,6 +29,7 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "os/os_thread.h"
+#include "os/os_time.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
@@ -229,11 +230,15 @@ svga_texture_destroy(struct pipe_screen *screen,
SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle);
svga_screen_surface_destroy(ss, &tex->key, &tex->handle);
- ss->total_resource_bytes -= tex->size;
+ ss->hud.total_resource_bytes -= tex->size;
FREE(tex->defined);
FREE(tex->rendered_to);
FREE(tex);
+
+ assert(ss->hud.num_resources > 0);
+ if (ss->hud.num_resources > 0)
+ ss->hud.num_resources--;
}
@@ -322,6 +327,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
boolean use_direct_map = svga_have_gb_objects(svga) &&
!svga_have_gb_dma(svga);
unsigned d;
+ void *returnVal;
+ int64_t begin = os_time_get();
/* We can't map texture storage directly unless we have GB objects */
if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
@@ -464,10 +471,10 @@ svga_texture_transfer_map(struct pipe_context *pipe,
* Begin mapping code
*/
if (st->swbuf) {
- return st->swbuf;
+ returnVal = st->swbuf;
}
else if (!st->use_direct_map) {
- return sws->buffer_map(sws, st->hwbuf, usage);
+ returnVal = sws->buffer_map(sws, st->hwbuf, usage);
}
else {
SVGA3dSize baseLevelSize;
@@ -518,9 +525,13 @@ svga_texture_transfer_map(struct pipe_context *pipe,
offset += svga3dsurface_get_pixel_offset(tex->key.format,
mip_width, mip_height,
xoffset, yoffset, zoffset);
-
- return (void *) (map + offset);
+ returnVal = (void *) (map + offset);
}
+
+ svga->hud.map_buffer_time += (os_time_get() - begin);
+ svga->hud.num_resources_mapped++;
+
+ return returnVal;
}
@@ -889,7 +900,8 @@ svga_texture_create(struct pipe_screen *screen,
(debug_reference_descriptor)debug_describe_resource, 0);
tex->size = util_resource_size(template);
- svgascreen->total_resource_bytes += tex->size;
+ svgascreen->hud.total_resource_bytes += tex->size;
+ svgascreen->hud.num_resources++;
return &tex->b.b;
}
@@ -901,6 +913,7 @@ svga_texture_from_handle(struct pipe_screen *screen,
struct winsys_handle *whandle)
{
struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+ struct svga_screen *ss = svga_screen(screen);
struct svga_winsys_surface *srf;
struct svga_texture *tex;
enum SVGA3dSurfaceFormat format = 0;
@@ -970,5 +983,7 @@ svga_texture_from_handle(struct pipe_screen *screen,
tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0]));
tex->imported = TRUE;
+ ss->hud.num_resources++;
+
return &tex->b.b;
}
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index e0a28788238..dab89814334 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -772,9 +772,22 @@ svga_get_driver_query_info(struct pipe_screen *screen,
struct pipe_driver_query_info *info)
{
static const struct pipe_driver_query_info queries[] = {
- {"draw-calls", SVGA_QUERY_DRAW_CALLS, {0}},
- {"fallbacks", SVGA_QUERY_FALLBACKS, {0}},
- {"memory-used", SVGA_QUERY_MEMORY_USED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES}
+ /* per-frame counters */
+ {"num-draw-calls", SVGA_QUERY_NUM_DRAW_CALLS, {0}},
+ {"num-fallbacks", SVGA_QUERY_NUM_FALLBACKS, {0}},
+ {"num-flushes", SVGA_QUERY_NUM_FLUSHES, {0}},
+ {"num-validations", SVGA_QUERY_NUM_VALIDATIONS, {0}},
+ {"map-buffer-time", SVGA_QUERY_MAP_BUFFER_TIME, {0},
+ PIPE_DRIVER_QUERY_TYPE_MICROSECONDS},
+ {"num-resources-mapped", SVGA_QUERY_NUM_RESOURCES_MAPPED, {0}},
+
+ /* running total counters */
+ {"memory-used", SVGA_QUERY_MEMORY_USED, {0},
+ PIPE_DRIVER_QUERY_TYPE_BYTES},
+ {"num-shaders", SVGA_QUERY_NUM_SHADERS, {0}},
+ {"num-resources", SVGA_QUERY_NUM_RESOURCES, {0}},
+ {"num-state-objects", SVGA_QUERY_NUM_STATE_OBJECTS, {0}},
+ {"num-surface-views", SVGA_QUERY_NUM_SURFACE_VIEWS, {0}},
};
if (!info)
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h
index 5581d2e1ffd..98b56b2a6d1 100644
--- a/src/gallium/drivers/svga/svga_screen.h
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -80,8 +80,12 @@ struct svga_screen
struct svga_host_surface_cache cache;
- /** Memory used by all resources (buffers and surfaces) */
- uint64_t total_resource_bytes;
+ /** HUD counters */
+ struct {
+ /** Memory used by all resources (buffers and surfaces) */
+ uint64_t total_resource_bytes;
+ uint64_t num_resources;
+ } hud;
};
#ifndef DEBUG
diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c
index d46e7ebbc38..5c99e16d976 100644
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@@ -414,6 +414,14 @@ svga_set_shader(struct svga_context *svga,
}
+struct svga_shader_variant *
+svga_new_shader_variant(struct svga_context *svga)
+{
+ svga->hud.num_shaders++;
+ return CALLOC_STRUCT(svga_shader_variant);
+}
+
+
enum pipe_error
svga_destroy_shader_variant(struct svga_context *svga,
SVGA3dShaderType type,
@@ -455,6 +463,8 @@ svga_destroy_shader_variant(struct svga_context *svga,
FREE((unsigned *)variant->tokens);
FREE(variant);
+ svga->hud.num_shaders--;
+
return ret;
}
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h
index b0800c1ecad..efcac408626 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -273,6 +273,9 @@ svga_set_shader(struct svga_context *svga,
SVGA3dShaderType type,
struct svga_shader_variant *variant);
+struct svga_shader_variant *
+svga_new_shader_variant(struct svga_context *svga);
+
enum pipe_error
svga_destroy_shader_variant(struct svga_context *svga,
SVGA3dShaderType type,
diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c
index 37d16dc9afe..722b369fd4b 100644
--- a/src/gallium/drivers/svga/svga_state.c
+++ b/src/gallium/drivers/svga/svga_state.c
@@ -225,6 +225,9 @@ svga_update_state(struct svga_context *svga, unsigned max_level)
svga->state.dirty[i] |= svga->dirty;
svga->dirty = 0;
+
+ svga->hud.num_validations++;
+
return PIPE_OK;
}
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index 75592d3bf8b..c93d2a5e565 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -718,7 +718,7 @@ emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
/* round down to mulitple of 16 (this may cause rendering problems
* but should avoid a device error).
*/
- size &= ~16;
+ size &= ~15;
}
}
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index c244d5352d9..e392778c2fb 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -90,7 +90,8 @@ translate_fragment_program(struct svga_context *svga,
PIPE_SHADER_FRAGMENT);
}
else {
- return svga_tgsi_vgpu9_translate(&fs->base, key, PIPE_SHADER_FRAGMENT);
+ return svga_tgsi_vgpu9_translate(svga, &fs->base, key,
+ PIPE_SHADER_FRAGMENT);
}
}
diff --git a/src/gallium/drivers/svga/svga_state_gs.c b/src/gallium/drivers/svga/svga_state_gs.c
index 7f75410fb57..0b336baee86 100644
--- a/src/gallium/drivers/svga/svga_state_gs.c
+++ b/src/gallium/drivers/svga/svga_state_gs.c
@@ -53,13 +53,9 @@ translate_geometry_program(struct svga_context *svga,
const struct svga_geometry_shader *gs,
const struct svga_compile_key *key)
{
- if (svga_have_vgpu10(svga)) {
- return svga_tgsi_vgpu10_translate(svga, &gs->base, key,
- PIPE_SHADER_GEOMETRY);
- }
- else {
- return svga_tgsi_vgpu9_translate(&gs->base, key, PIPE_SHADER_GEOMETRY);
- }
+ assert(svga_have_vgpu10(svga));
+ return svga_tgsi_vgpu10_translate(svga, &gs->base, key,
+ PIPE_SHADER_GEOMETRY);
}
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index a846b779e70..24574c1bf85 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -81,7 +81,8 @@ translate_vertex_program(struct svga_context *svga,
PIPE_SHADER_VERTEX);
}
else {
- return svga_tgsi_vgpu9_translate(&vs->base, key, PIPE_SHADER_VERTEX);
+ return svga_tgsi_vgpu9_translate(svga, &vs->base, key,
+ PIPE_SHADER_VERTEX);
}
}
diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c
index aca5abcdfce..9f09311116e 100644
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -317,6 +317,8 @@ svga_create_surface_view(struct pipe_context *pipe,
s->real_level = surf_tmpl->u.tex.level;
}
+ svga->hud.num_surface_views++;
+
return &s->base;
}
@@ -509,6 +511,8 @@ svga_surface_destroy(struct pipe_context *pipe,
pipe_resource_reference(&surf->texture, NULL);
FREE(surf);
+
+ svga->hud.num_surface_views--;
}
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 9a6fb465ccb..202eee276b7 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -175,7 +175,8 @@ svga_shader_emit_header(struct svga_shader_emitter *emit)
* it is, it will be copied to a hardware buffer for upload.
*/
struct svga_shader_variant *
-svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+svga_tgsi_vgpu9_translate(struct svga_context *svga,
+ const struct svga_shader *shader,
const struct svga_compile_key *key, unsigned unit)
{
struct svga_shader_variant *variant = NULL;
@@ -227,7 +228,7 @@ svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
goto fail;
}
- variant = CALLOC_STRUCT(svga_shader_variant);
+ variant = svga_new_shader_variant(svga);
if (variant == NULL)
goto fail;
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index 207a3f0a845..2581135701f 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -63,7 +63,8 @@ static inline void svga_generate_vdecl_semantics( unsigned idx,
struct svga_shader_variant *
-svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+svga_tgsi_vgpu9_translate(struct svga_context *svga,
+ const struct svga_shader *shader,
const struct svga_compile_key *key, unsigned unit);
struct svga_shader_variant *
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index e4f027b9567..d62f2bbcc96 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -6735,7 +6735,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
/*
* Create, initialize the 'variant' object.
*/
- variant = CALLOC_STRUCT(svga_shader_variant);
+ variant = svga_new_shader_variant(svga);
if (!variant)
goto cleanup;
diff --git a/src/gallium/state_trackers/va/image.c b/src/gallium/state_trackers/va/image.c
index 022240df84f..b37a9714437 100644
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -116,7 +116,7 @@ vlVaCreateImage(VADriverContextP ctx, VAImageFormat *format, int width, int heig
img->width = width;
img->height = height;
w = align(width, 2);
- h = align(width, 2);
+ h = align(height, 2);
switch (format->fourcc) {
case VA_FOURCC('N','V','1','2'):
@@ -240,9 +240,11 @@ vlVaGetImage(VADriverContextP ctx, VASurfaceID surface, int x, int y,
return VA_STATUS_ERROR_OPERATION_FAILED;
if (format != surf->buffer->buffer_format) {
- /* support NV12 to YV12 conversion now only */
- if (format == PIPE_FORMAT_YV12 &&
- surf->buffer->buffer_format == PIPE_FORMAT_NV12)
+ /* support NV12 to YV12 and IYUV conversion now only */
+ if ((format == PIPE_FORMAT_YV12 &&
+ surf->buffer->buffer_format == PIPE_FORMAT_NV12) ||
+ (format == PIPE_FORMAT_IYUV &&
+ surf->buffer->buffer_format == PIPE_FORMAT_NV12))
convert = true;
else
return VA_STATUS_ERROR_OPERATION_FAILED;
diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index e26ca33a521..b5221472ef0 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -76,7 +76,6 @@ d3dadapter9_la_LIBADD = \
$(top_builddir)/src/gallium/auxiliary/libgalliumvl_stub.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/glsl/libnir.la \
- $(top_builddir)/src/libglsl_util.la \
$(top_builddir)/src/gallium/state_trackers/nine/libninetracker.la \
$(top_builddir)/src/util/libmesautil.la \
$(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
diff --git a/src/gallium/targets/pipe-loader/Makefile.am b/src/gallium/targets/pipe-loader/Makefile.am
index 4d9f7be2ec9..4f25b4f6073 100644
--- a/src/gallium/targets/pipe-loader/Makefile.am
+++ b/src/gallium/targets/pipe-loader/Makefile.am
@@ -53,7 +53,6 @@ endif
PIPE_LIBS += \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/glsl/libnir.la \
- $(top_builddir)/src/libglsl_util.la \
$(top_builddir)/src/util/libmesautil.la \
$(top_builddir)/src/gallium/drivers/rbug/librbug.la \
$(top_builddir)/src/gallium/drivers/trace/libtrace.la \
diff --git a/src/gallium/targets/xa/Makefile.am b/src/gallium/targets/xa/Makefile.am
index 92173dedce3..02c42c665ed 100644
--- a/src/gallium/targets/xa/Makefile.am
+++ b/src/gallium/targets/xa/Makefile.am
@@ -38,7 +38,6 @@ libxatracker_la_LIBADD = \
$(top_builddir)/src/gallium/auxiliary/libgalliumvl_stub.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/glsl/libnir.la \
- $(top_builddir)/src/libglsl_util.la \
$(top_builddir)/src/util/libmesautil.la \
$(LIBDRM_LIBS) \
$(GALLIUM_COMMON_LIB_DEPS)
diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index 08368311b8a..8b0a73b250a 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -148,9 +148,6 @@ libglsl_la_SOURCES = \
libnir_la_SOURCES = \
- glsl_types.cpp \
- builtin_types.cpp \
- glsl_symbol_table.cpp \
$(NIR_FILES) \
$(NIR_GENERATED_FILES)
@@ -160,6 +157,7 @@ glsl_compiler_SOURCES = \
glsl_compiler_LDADD = \
libglsl.la \
$(top_builddir)/src/libglsl_util.la \
+ $(top_builddir)/src/util/libmesautil.la \
$(PTHREAD_LIBS)
spirv2nir_SOURCES = \
@@ -284,6 +282,5 @@ nir_tests_control_flow_tests_CFLAGS = \
nir_tests_control_flow_tests_LDADD = \
$(top_builddir)/src/gtest/libgtest.la \
$(top_builddir)/src/glsl/libnir.la \
- $(top_builddir)/src/libglsl_util.la \
$(top_builddir)/src/util/libmesautil.la \
$(PTHREAD_LIBS)
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 65a26268c2e..47dc628101d 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -20,6 +20,8 @@ NIR_GENERATED_FILES = \
NIR_FILES = \
nir/glsl_to_nir.cpp \
nir/glsl_to_nir.h \
+ nir/glsl_types.cpp \
+ nir/glsl_types.h \
nir/nir.c \
nir/nir.h \
nir/nir_array.h \
@@ -33,6 +35,8 @@ NIR_FILES = \
nir/nir_gs_count_vertices.c \
nir/nir_intrinsics.c \
nir/nir_intrinsics.h \
+ nir/nir_instr_set.c \
+ nir/nir_instr_set.h \
nir/nir_live_variables.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
@@ -81,6 +85,8 @@ NIR_FILES = \
nir/nir_worklist.c \
nir/nir_worklist.h \
nir/nir_types.cpp \
+ nir/shader_enums.h \
+ nir/shader_enums.c \
nir/spirv_to_nir.c \
nir/spirv_glsl450_to_nir.c
@@ -103,8 +109,6 @@ LIBGLSL_FILES = \
glsl_parser_extras.h \
glsl_symbol_table.cpp \
glsl_symbol_table.h \
- glsl_types.cpp \
- glsl_types.h \
hir_field_selection.cpp \
ir_basic_block.cpp \
ir_basic_block.h \
@@ -206,8 +210,7 @@ LIBGLSL_FILES = \
opt_vectorize.cpp \
program.h \
s_expression.cpp \
- s_expression.h \
- shader_enums.h
+ s_expression.h
# glsl_compiler
diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index 89c603580a5..70bf5b09c3c 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -16,6 +16,7 @@ env.Prepend(CPPPATH = [
'#src/gallium/include',
'#src/gallium/auxiliary',
'#src/glsl',
+ '#src/glsl/nir',
'#src/glsl/glcpp',
])
@@ -60,6 +61,12 @@ source_lists = env.ParseSourceList('Makefile.sources')
for l in ('LIBGLCPP_FILES', 'LIBGLSL_FILES'):
glsl_sources += source_lists[l]
+# add nir/glsl_types.cpp manually, because SCons still doesn't know about NIR.
+# XXX: Remove this once we build NIR and NIR_FILES.
+glsl_sources += [
+ 'nir/glsl_types.cpp',
+]
+
if env['msvc']:
env.Prepend(CPPPATH = ['#/src/getopt'])
env.PrependUnique(LIBS = [getopt])
diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 4c314366133..e803e6d7675 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -62,6 +62,8 @@ public:
virtual ir_rvalue *hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state);
+ virtual bool has_sequence_subexpression() const;
+
/**
* Retrieve the source location of an AST node
*
@@ -181,6 +183,7 @@ enum ast_operators {
ast_post_dec,
ast_field_selection,
ast_array_index,
+ ast_unsized_array_dim,
ast_function_call,
@@ -221,6 +224,8 @@ public:
virtual void hir_no_rvalue(exec_list *instructions,
struct _mesa_glsl_parse_state *state);
+ virtual bool has_sequence_subexpression() const;
+
ir_rvalue *do_hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state,
bool needs_rvalue);
@@ -299,6 +304,8 @@ public:
virtual void hir_no_rvalue(exec_list *instructions,
struct _mesa_glsl_parse_state *state);
+ virtual bool has_sequence_subexpression() const;
+
private:
/**
* Is this function call actually a constructor?
@@ -318,16 +325,7 @@ public:
class ast_array_specifier : public ast_node {
public:
- /** Unsized array specifier ([]) */
- explicit ast_array_specifier(const struct YYLTYPE &locp)
- : is_unsized_array(true)
- {
- set_location(locp);
- }
-
- /** Sized array specifier ([dim]) */
ast_array_specifier(const struct YYLTYPE &locp, ast_expression *dim)
- : is_unsized_array(false)
{
set_location(locp);
array_dimensions.push_tail(&dim->link);
@@ -338,13 +336,16 @@ public:
array_dimensions.push_tail(&dim->link);
}
- virtual void print(void) const;
+ const bool is_single_dimension()
+ {
+ return this->array_dimensions.tail_pred->prev != NULL &&
+ this->array_dimensions.tail_pred->prev->is_head_sentinel();
+ }
- /* If true, this means that the array has an unsized outermost dimension. */
- bool is_unsized_array;
+ virtual void print(void) const;
/* This list contains objects of type ast_node containing the
- * sized dimensions only, in outermost-to-innermost order.
+ * array dimensions in outermost-to-innermost order.
*/
exec_list array_dimensions;
};
diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index 5e8f49d70b0..74d403fdb65 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -28,13 +28,10 @@
void
ast_array_specifier::print(void) const
{
- if (this->is_unsized_array) {
- printf("[ ] ");
- }
-
foreach_list_typed (ast_node, array_dimension, link, &this->array_dimensions) {
printf("[ ");
- array_dimension->print();
+ if (((ast_expression*)array_dimension)->oper != ast_unsized_array_dim)
+ array_dimension->print();
printf("] ");
}
}
@@ -64,21 +61,29 @@ update_max_array_access(ir_rvalue *ir, int idx, YYLTYPE *loc,
}
} else if (ir_dereference_record *deref_record =
ir->as_dereference_record()) {
- /* There are two possibilities we need to consider:
+ /* There are three possibilities we need to consider:
*
* - Accessing an element of an array that is a member of a named
* interface block (e.g. ifc.foo[i])
*
* - Accessing an element of an array that is a member of a named
* interface block array (e.g. ifc[j].foo[i]).
+ *
+ * - Accessing an element of an array that is a member of a named
+ * interface block array of arrays (e.g. ifc[j][k].foo[i]).
*/
ir_dereference_variable *deref_var =
deref_record->record->as_dereference_variable();
if (deref_var == NULL) {
- if (ir_dereference_array *deref_array =
- deref_record->record->as_dereference_array()) {
- deref_var = deref_array->array->as_dereference_variable();
+ ir_dereference_array *deref_array =
+ deref_record->record->as_dereference_array();
+ ir_dereference_array *deref_array_prev = NULL;
+ while (deref_array != NULL) {
+ deref_array_prev = deref_array;
+ deref_array = deref_array->array->as_dereference_array();
}
+ if (deref_array_prev != NULL)
+ deref_var = deref_array_prev->array->as_dereference_variable();
}
if (deref_var != NULL) {
@@ -230,7 +235,7 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
ir_var_shader_storage) {
_mesa_glsl_error(&loc, state, "unsized array index must be constant");
}
- } else if (array->type->fields.array->is_interface()
+ } else if (array->type->without_array()->is_interface()
&& (array->variable_referenced()->data.mode == ir_var_uniform ||
array->variable_referenced()->data.mode == ir_var_shader_storage)
&& !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 26d4c62ce36..c5c5cae333b 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -437,13 +437,54 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
}
}
- /* If the function call is a constant expression, don't generate any
- * instructions; just generate an ir_constant.
+ /* Section 4.3.2 (Const) of the GLSL 1.10.59 spec says:
+ *
+ * "Initializers for const declarations must be formed from literal
+ * values, other const variables (not including function call
+ * paramaters), or expressions of these.
+ *
+ * Constructors may be used in such expressions, but function calls may
+ * not."
+ *
+ * Section 4.3.3 (Constant Expressions) of the GLSL 1.20.8 spec says:
+ *
+ * "A constant expression is one of
+ *
+ * ...
+ *
+ * - a built-in function call whose arguments are all constant
+ * expressions, with the exception of the texture lookup
+ * functions, the noise functions, and ftransform. The built-in
+ * functions dFdx, dFdy, and fwidth must return 0 when evaluated
+ * inside an initializer with an argument that is a constant
+ * expression."
+ *
+ * Section 5.10 (Constant Expressions) of the GLSL ES 1.00.17 spec says:
+ *
+ * "A constant expression is one of
*
- * Function calls were first allowed to be constant expressions in GLSL
- * 1.20 and GLSL ES 3.00.
+ * ...
+ *
+ * - a built-in function call whose arguments are all constant
+ * expressions, with the exception of the texture lookup
+ * functions."
+ *
+ * Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec says:
+ *
+ * "A constant expression is one of
+ *
+ * ...
+ *
+ * - a built-in function call whose arguments are all constant
+ * expressions, with the exception of the texture lookup
+ * functions. The built-in functions dFdx, dFdy, and fwidth must
+ * return 0 when evaluated inside an initializer with an argument
+ * that is a constant expression."
+ *
+ * If the function call is a constant expression, don't generate any
+ * instructions; just generate an ir_constant.
*/
- if (state->is_version(120, 300)) {
+ if (state->is_version(120, 100)) {
ir_constant *value = sig->constant_expression_value(actual_parameters, NULL);
if (value != NULL) {
return value;
@@ -950,6 +991,7 @@ process_array_constructor(exec_list *instructions,
}
bool all_parameters_are_constant = true;
+ const glsl_type *element_type = constructor_type->fields.array;
/* Type cast each parameter and, if possible, fold constants. */
foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) {
@@ -976,12 +1018,34 @@ process_array_constructor(exec_list *instructions,
}
}
- if (result->type != constructor_type->fields.array) {
+ if (constructor_type->fields.array->is_unsized_array()) {
+ /* As the inner parameters of the constructor are created without
+ * knowledge of each other we need to check to make sure unsized
+ * parameters of unsized constructors all end up with the same size.
+ *
+ * e.g we make sure to fail for a constructor like this:
+ * vec4[][] a = vec4[][](vec4[](vec4(0.0), vec4(1.0)),
+ * vec4[](vec4(0.0), vec4(1.0), vec4(1.0)),
+ * vec4[](vec4(0.0), vec4(1.0)));
+ */
+ if (element_type->is_unsized_array()) {
+ /* This is the first parameter so just get the type */
+ element_type = result->type;
+ } else if (element_type != result->type) {
+ _mesa_glsl_error(loc, state, "type error in array constructor: "
+ "expected: %s, found %s",
+ element_type->name,
+ result->type->name);
+ return ir_rvalue::error_value(ctx);
+ }
+ } else if (result->type != constructor_type->fields.array) {
_mesa_glsl_error(loc, state, "type error in array constructor: "
"expected: %s, found %s",
constructor_type->fields.array->name,
result->type->name);
return ir_rvalue::error_value(ctx);
+ } else {
+ element_type = result->type;
}
/* Attempt to convert the parameter to a constant valued expression.
@@ -998,6 +1062,14 @@ process_array_constructor(exec_list *instructions,
ir->replace_with(result);
}
+ if (constructor_type->fields.array->is_unsized_array()) {
+ constructor_type =
+ glsl_type::get_array_instance(element_type,
+ parameter_count);
+ assert(constructor_type != NULL);
+ assert(constructor_type->length == parameter_count);
+ }
+
if (all_parameters_are_constant)
return new(ctx) ir_constant(constructor_type, &actual_parameters);
@@ -1958,6 +2030,17 @@ ast_function_expression::hir(exec_list *instructions,
unreachable("not reached");
}
+bool
+ast_function_expression::has_sequence_subexpression() const
+{
+ foreach_list_typed(const ast_node, ast, link, &this->expressions) {
+ if (ast->has_sequence_subexpression())
+ return true;
+ }
+
+ return false;
+}
+
ir_rvalue *
ast_aggregate_initializer::hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state)
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index f38ca84d129..0c11ec58d20 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -782,8 +782,30 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
* Note: Whole-array assignments are not permitted in GLSL 1.10, but this
* is handled by ir_dereference::is_lvalue.
*/
- if (lhs->type->is_unsized_array() && rhs->type->is_array()
- && (lhs->type->fields.array == rhs->type->fields.array)) {
+ const glsl_type *lhs_t = lhs->type;
+ const glsl_type *rhs_t = rhs->type;
+ bool unsized_array = false;
+ while(lhs_t->is_array()) {
+ if (rhs_t == lhs_t)
+ break; /* the rest of the inner arrays match so break out early */
+ if (!rhs_t->is_array()) {
+ unsized_array = false;
+ break; /* number of dimensions mismatch */
+ }
+ if (lhs_t->length == rhs_t->length) {
+ lhs_t = lhs_t->fields.array;
+ rhs_t = rhs_t->fields.array;
+ continue;
+ } else if (lhs_t->is_unsized_array()) {
+ unsized_array = true;
+ } else {
+ unsized_array = false;
+ break; /* sized array mismatch */
+ }
+ lhs_t = lhs_t->fields.array;
+ rhs_t = rhs_t->fields.array;
+ }
+ if (unsized_array) {
if (is_initializer) {
return rhs;
} else {
@@ -1004,6 +1026,12 @@ ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
return NULL;
}
+bool
+ast_node::has_sequence_subexpression() const
+{
+ return false;
+}
+
void
ast_function_expression::hir_no_rvalue(exec_list *instructions,
struct _mesa_glsl_parse_state *state)
@@ -1805,6 +1833,10 @@ ast_expression::do_hir(exec_list *instructions,
break;
}
+ case ast_unsized_array_dim:
+ assert(!"ast_unsized_array_dim: Should never get here.");
+ break;
+
case ast_function_call:
/* Should *NEVER* get here. ast_function_call should always be handled
* by ast_function_expression::hir.
@@ -1916,6 +1948,83 @@ ast_expression::do_hir(exec_list *instructions,
return result;
}
+bool
+ast_expression::has_sequence_subexpression() const
+{
+ switch (this->oper) {
+ case ast_plus:
+ case ast_neg:
+ case ast_bit_not:
+ case ast_logic_not:
+ case ast_pre_inc:
+ case ast_pre_dec:
+ case ast_post_inc:
+ case ast_post_dec:
+ return this->subexpressions[0]->has_sequence_subexpression();
+
+ case ast_assign:
+ case ast_add:
+ case ast_sub:
+ case ast_mul:
+ case ast_div:
+ case ast_mod:
+ case ast_lshift:
+ case ast_rshift:
+ case ast_less:
+ case ast_greater:
+ case ast_lequal:
+ case ast_gequal:
+ case ast_nequal:
+ case ast_equal:
+ case ast_bit_and:
+ case ast_bit_xor:
+ case ast_bit_or:
+ case ast_logic_and:
+ case ast_logic_or:
+ case ast_logic_xor:
+ case ast_array_index:
+ case ast_mul_assign:
+ case ast_div_assign:
+ case ast_add_assign:
+ case ast_sub_assign:
+ case ast_mod_assign:
+ case ast_ls_assign:
+ case ast_rs_assign:
+ case ast_and_assign:
+ case ast_xor_assign:
+ case ast_or_assign:
+ return this->subexpressions[0]->has_sequence_subexpression() ||
+ this->subexpressions[1]->has_sequence_subexpression();
+
+ case ast_conditional:
+ return this->subexpressions[0]->has_sequence_subexpression() ||
+ this->subexpressions[1]->has_sequence_subexpression() ||
+ this->subexpressions[2]->has_sequence_subexpression();
+
+ case ast_sequence:
+ return true;
+
+ case ast_field_selection:
+ case ast_identifier:
+ case ast_int_constant:
+ case ast_uint_constant:
+ case ast_float_constant:
+ case ast_bool_constant:
+ case ast_double_constant:
+ return false;
+
+ case ast_aggregate:
+ unreachable("ast_aggregate: Should never get here.");
+
+ case ast_function_call:
+ unreachable("should be handled by ast_function_expression::hir");
+
+ case ast_unsized_array_dim:
+ unreachable("ast_unsized_array_dim: Should never get here.");
+ }
+
+ return false;
+}
ir_rvalue *
ast_expression_statement::hir(exec_list *instructions,
@@ -1968,6 +2077,14 @@ process_array_size(exec_node *node,
exec_list dummy_instructions;
ast_node *array_size = exec_node_data(ast_node, node, link);
+
+ /**
+ * Dimensions other than the outermost dimension can by unsized if they
+ * are immediately sized by a constructor or initializer.
+ */
+ if (((ast_expression*)array_size)->oper == ast_unsized_array_dim)
+ return 0;
+
ir_rvalue *const ir = array_size->hir(& dummy_instructions, state);
YYLTYPE loc = array_size->get_location();
@@ -1990,7 +2107,7 @@ process_array_size(exec_node *node,
}
ir_constant *const size = ir->constant_expression_value();
- if (size == NULL) {
+ if (size == NULL || array_size->has_sequence_subexpression()) {
_mesa_glsl_error(& loc, state, "array size must be a "
"constant valued expression");
return 0;
@@ -2028,20 +2145,7 @@ process_array_type(YYLTYPE *loc, const glsl_type *base,
*
* "Only one-dimensional arrays may be declared."
*/
- if (!state->ARB_arrays_of_arrays_enable) {
- _mesa_glsl_error(loc, state,
- "invalid array of `%s'"
- "GL_ARB_arrays_of_arrays "
- "required for defining arrays of arrays",
- base->name);
- return glsl_type::error_type;
- }
-
- if (base->length == 0) {
- _mesa_glsl_error(loc, state,
- "only the outermost array dimension can "
- "be unsized",
- base->name);
+ if (!state->check_arrays_of_arrays_allowed(loc)) {
return glsl_type::error_type;
}
}
@@ -2051,9 +2155,6 @@ process_array_type(YYLTYPE *loc, const glsl_type *base,
unsigned array_size = process_array_size(node, state);
array_type = glsl_type::get_array_instance(array_type, array_size);
}
-
- if (array_specifier->is_unsized_array)
- array_type = glsl_type::get_array_instance(array_type, 0);
}
return array_type;
@@ -2592,6 +2693,25 @@ is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state,
return false;
}
+static inline void
+validate_array_dimensions(const glsl_type *t,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc) {
+ if (t->is_array()) {
+ t = t->fields.array;
+ while (t->is_array()) {
+ if (t->is_unsized_array()) {
+ _mesa_glsl_error(loc, state,
+ "only the outermost array dimension can "
+ "be unsized",
+ t->name);
+ break;
+ }
+ t = t->fields.array;
+ }
+ }
+}
+
static void
apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
ir_variable *var,
@@ -3171,7 +3291,8 @@ process_initializer(ir_variable *var, ast_declaration *decl,
*/
if (var->data.mode == ir_var_uniform) {
state->check_version(120, 0, &initializer_loc,
- "cannot initialize uniforms");
+ "cannot initialize uniform %s",
+ var->name);
}
/* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
@@ -3179,8 +3300,9 @@ process_initializer(ir_variable *var, ast_declaration *decl,
* "Buffer variables cannot have initializers."
*/
if (var->data.mode == ir_var_shader_storage) {
- _mesa_glsl_error(& initializer_loc, state,
- "SSBO variables cannot have initializers");
+ _mesa_glsl_error(&initializer_loc, state,
+ "cannot initialize buffer variable %s",
+ var->name);
}
/* From section 4.1.7 of the GLSL 4.40 spec:
@@ -3190,16 +3312,25 @@ process_initializer(ir_variable *var, ast_declaration *decl,
* shader."
*/
if (var->type->contains_opaque()) {
- _mesa_glsl_error(& initializer_loc, state,
- "cannot initialize opaque variable");
+ _mesa_glsl_error(&initializer_loc, state,
+ "cannot initialize opaque variable %s",
+ var->name);
}
if ((var->data.mode == ir_var_shader_in) && (state->current_function == NULL)) {
- _mesa_glsl_error(& initializer_loc, state,
- "cannot initialize %s shader input / %s",
- _mesa_shader_stage_to_string(state->stage),
- (state->stage == MESA_SHADER_VERTEX)
- ? "attribute" : "varying");
+ _mesa_glsl_error(&initializer_loc, state,
+ "cannot initialize %s shader input / %s %s",
+ _mesa_shader_stage_to_string(state->stage),
+ (state->stage == MESA_SHADER_VERTEX)
+ ? "attribute" : "varying",
+ var->name);
+ }
+
+ if (var->data.mode == ir_var_shader_out && state->current_function == NULL) {
+ _mesa_glsl_error(&initializer_loc, state,
+ "cannot initialize %s shader output %s",
+ _mesa_shader_stage_to_string(state->stage),
+ var->name);
}
/* If the initializer is an ast_aggregate_initializer, recursively store
@@ -3214,16 +3345,72 @@ process_initializer(ir_variable *var, ast_declaration *decl,
/* Calculate the constant value if this is a const or uniform
* declaration.
+ *
+ * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says:
+ *
+ * "Declarations of globals without a storage qualifier, or with
+ * just the const qualifier, may include initializers, in which case
+ * they will be initialized before the first line of main() is
+ * executed. Such initializers must be a constant expression."
+ *
+ * The same section of the GLSL ES 3.00.4 spec has similar language.
*/
if (type->qualifier.flags.q.constant
- || type->qualifier.flags.q.uniform) {
+ || type->qualifier.flags.q.uniform
+ || (state->es_shader && state->current_function == NULL)) {
ir_rvalue *new_rhs = validate_assignment(state, initializer_loc,
lhs, rhs, true);
if (new_rhs != NULL) {
rhs = new_rhs;
+ /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec
+ * says:
+ *
+ * "A constant expression is one of
+ *
+ * ...
+ *
+ * - an expression formed by an operator on operands that are
+ * all constant expressions, including getting an element of
+ * a constant array, or a field of a constant structure, or
+ * components of a constant vector. However, the sequence
+ * operator ( , ) and the assignment operators ( =, +=, ...)
+ * are not included in the operators that can create a
+ * constant expression."
+ *
+ * Section 12.43 (Sequence operator and constant expressions) says:
+ *
+ * "Should the following construct be allowed?
+ *
+ * float a[2,3];
+ *
+ * The expression within the brackets uses the sequence operator
+ * (',') and returns the integer 3 so the construct is declaring
+ * a single-dimensional array of size 3. In some languages, the
+ * construct declares a two-dimensional array. It would be
+ * preferable to make this construct illegal to avoid confusion.
+ *
+ * One possibility is to change the definition of the sequence
+ * operator so that it does not return a constant-expression and
+ * hence cannot be used to declare an array size.
+ *
+ * RESOLUTION: The result of a sequence operator is not a
+ * constant-expression."
+ *
+ * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec
+ * contains language almost identical to the section 4.3.3 in the
+ * GLSL ES 3.00.4 spec. This is a new limitation for these GLSL
+ * versions.
+ */
ir_constant *constant_value = rhs->constant_expression_value();
- if (!constant_value) {
+ if (!constant_value ||
+ (state->is_version(430, 300) &&
+ decl->initializer->has_sequence_subexpression())) {
+ const char *const variable_mode =
+ (type->qualifier.flags.q.constant)
+ ? "const"
+ : ((type->qualifier.flags.q.uniform) ? "uniform" : "global");
+
/* If ARB_shading_language_420pack is enabled, initializers of
* const-qualified local variables do not have to be constant
* expressions. Const-qualified global variables must still be
@@ -3234,22 +3421,24 @@ process_initializer(ir_variable *var, ast_declaration *decl,
_mesa_glsl_error(& initializer_loc, state,
"initializer of %s variable `%s' must be a "
"constant expression",
- (type->qualifier.flags.q.constant)
- ? "const" : "uniform",
+ variable_mode,
decl->identifier);
if (var->type->is_numeric()) {
/* Reduce cascading errors. */
- var->constant_value = ir_constant::zero(state, var->type);
+ var->constant_value = type->qualifier.flags.q.constant
+ ? ir_constant::zero(state, var->type) : NULL;
}
}
} else {
rhs = constant_value;
- var->constant_value = constant_value;
+ var->constant_value = type->qualifier.flags.q.constant
+ ? constant_value : NULL;
}
} else {
if (var->type->is_numeric()) {
/* Reduce cascading errors. */
- var->constant_value = ir_constant::zero(state, var->type);
+ var->constant_value = type->qualifier.flags.q.constant
+ ? ir_constant::zero(state, var->type) : NULL;
}
}
}
@@ -4265,6 +4454,8 @@ ast_declarator_list::hir(exec_list *instructions,
result = process_initializer((earlier == NULL) ? var : earlier,
decl, this->type,
&initializer_instructions, state);
+ } else {
+ validate_array_dimensions(var_type, state, &loc);
}
/* From page 23 (page 29 of the PDF) of the GLSL 1.10 spec:
@@ -5790,6 +5981,7 @@ ast_process_structure_or_interface_block(exec_list *instructions,
const struct glsl_type *field_type =
process_array_type(&loc, decl_type, decl->array_specifier, state);
+ validate_array_dimensions(field_type, state, &loc);
fields[i].type = field_type;
fields[i].name = decl->identifier;
fields[i].location = -1;
@@ -6142,7 +6334,8 @@ ast_interface_block::hir(exec_list *instructions,
_mesa_shader_stage_to_string(state->stage));
}
if (this->instance_name == NULL ||
- strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL) {
+ strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL ||
+ !this->array_specifier->is_single_dimension()) {
_mesa_glsl_error(&loc, state,
"gl_PerVertex input must be redeclared as "
"gl_in[]");
@@ -6305,6 +6498,9 @@ ast_interface_block::hir(exec_list *instructions,
ir_variable *var;
if (this->array_specifier != NULL) {
+ const glsl_type *block_array_type =
+ process_array_type(&loc, block_type, this->array_specifier, state);
+
/* Section 4.3.7 (Interface Blocks) of the GLSL 1.50 spec says:
*
* For uniform blocks declared an array, each individual array
@@ -6328,7 +6524,7 @@ ast_interface_block::hir(exec_list *instructions,
* tessellation control shader output, and tessellation evaluation
* shader input.
*/
- if (this->array_specifier->is_unsized_array) {
+ if (block_array_type->is_unsized_array()) {
bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY ||
state->stage == MESA_SHADER_TESS_CTRL ||
state->stage == MESA_SHADER_TESS_EVAL;
@@ -6355,9 +6551,6 @@ ast_interface_block::hir(exec_list *instructions,
}
}
- const glsl_type *block_array_type =
- process_array_type(&loc, block_type, this->array_specifier, state);
-
/* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
*
* * Arrays of arrays of blocks are not allowed
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index f0f6be21b7d..aae25f893e8 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -403,7 +403,7 @@ shader_atomic_counters(const _mesa_glsl_parse_state *state)
static bool
shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
{
- return state->ARB_shader_storage_buffer_object_enable;
+ return state->has_shader_storage_buffer_objects();
}
static bool
diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp
index 0aedbb3546a..bbdcd199e92 100644
--- a/src/glsl/builtin_types.cpp
+++ b/src/glsl/builtin_types.cpp
@@ -43,9 +43,7 @@
* convenience pointers (glsl_type::foo_type).
* @{
*/
-#define DECL_TYPE(NAME, ...) \
- const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \
- const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type;
+#define DECL_TYPE(NAME, ...)
#define STRUCT_TYPE(NAME) \
const glsl_type glsl_type::_struct_##NAME##_type = \
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index c1bcccc34f4..cd00f6e085b 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1962,7 +1962,9 @@ array_specifier:
'[' ']'
{
void *ctx = state;
- $$ = new(ctx) ast_array_specifier(@1);
+ $$ = new(ctx) ast_array_specifier(@1, new(ctx) ast_expression(
+ ast_unsized_array_dim, NULL,
+ NULL, NULL));
$$->set_location_range(@1, @2);
}
| '[' constant_expression ']'
@@ -1973,29 +1975,21 @@ array_specifier:
}
| array_specifier '[' ']'
{
+ void *ctx = state;
$$ = $1;
- if (!state->ARB_arrays_of_arrays_enable) {
- _mesa_glsl_error(& @1, state,
- "GL_ARB_arrays_of_arrays "
- "required for defining arrays of arrays");
- } else {
- _mesa_glsl_error(& @1, state,
- "only the outermost array dimension can "
- "be unsized");
+ if (state->check_arrays_of_arrays_allowed(& @1)) {
+ $$->add_dimension(new(ctx) ast_expression(ast_unsized_array_dim, NULL,
+ NULL, NULL));
}
}
| array_specifier '[' constant_expression ']'
{
$$ = $1;
- if (!state->ARB_arrays_of_arrays_enable) {
- _mesa_glsl_error(& @1, state,
- "GL_ARB_arrays_of_arrays "
- "required for defining arrays of arrays");
+ if (state->check_arrays_of_arrays_allowed(& @1)) {
+ $$->add_dimension($3);
}
-
- $$->add_dimension($3);
}
;
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 7fee43ece52..e8740f9ecb9 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -115,6 +115,20 @@ struct _mesa_glsl_parse_state {
unsigned required_glsl_es_version,
YYLTYPE *locp, const char *fmt, ...) PRINTFLIKE(5, 6);
+ bool check_arrays_of_arrays_allowed(YYLTYPE *locp)
+ {
+ if (!(ARB_arrays_of_arrays_enable || is_version(430, 310))) {
+ const char *const requirement = this->es_shader
+ ? "GLSL ES 3.10"
+ : "GL_ARB_arrays_of_arrays or GLSL 4.30";
+ _mesa_glsl_error(locp, this,
+ "%s required for defining arrays of arrays.",
+ requirement);
+ return false;
+ }
+ return true;
+ }
+
bool check_precision_qualifiers_allowed(YYLTYPE *locp)
{
return check_version(130, 100, locp,
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 2c45b9edc0f..8933b230177 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -662,6 +662,22 @@ ir_expression::get_operator(const char *str)
return (ir_expression_operation) -1;
}
+ir_variable *
+ir_expression::variable_referenced() const
+{
+ switch (operation) {
+ case ir_binop_vector_extract:
+ case ir_triop_vector_insert:
+ /* We get these for things like a[0] where a is a vector type. In these
+ * cases we want variable_referenced() to return the actual vector
+ * variable this is wrapping.
+ */
+ return operands[0]->variable_referenced();
+ default:
+ return ir_rvalue::variable_referenced();
+ }
+}
+
ir_constant::ir_constant()
: ir_rvalue(ir_type_constant)
{
@@ -1673,8 +1689,8 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
if (type->is_interface())
this->init_interface_type(type);
- else if (type->is_array() && type->fields.array->is_interface())
- this->init_interface_type(type->fields.array);
+ else if (type->without_array()->is_interface())
+ this->init_interface_type(type->without_array());
}
}
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 43a2bf0ae1c..9c9f22d018b 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1731,6 +1731,8 @@ public:
virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ virtual ir_variable *variable_referenced() const;
+
ir_expression_operation operation;
ir_rvalue *operands[4];
};
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 309b6b72b5b..67ed3605a8c 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -36,6 +36,7 @@
#include <math.h>
#include "main/core.h" /* for MAX2, MIN2, CLAMP */
#include "util/rounding.h" /* for _mesa_roundeven */
+#include "util/half_float.h"
#include "ir.h"
#include "glsl_types.h"
#include "program/hash_table.h"
diff --git a/src/glsl/ir_set_program_inouts.cpp b/src/glsl/ir_set_program_inouts.cpp
index b7a0f6e95ba..d7c29b00f88 100644
--- a/src/glsl/ir_set_program_inouts.cpp
+++ b/src/glsl/ir_set_program_inouts.cpp
@@ -242,6 +242,12 @@ ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var,
type = type->fields.array;
}
+ /* TODO: implement proper arrays of arrays support
+ * for now let the caller mark whole variable as used.
+ */
+ if (type->is_array() && type->fields.array->is_array())
+ return false;
+
/* The code below only handles:
*
* - Indexing into matrices
diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h
index 50fe76b7ea2..1854279925b 100644
--- a/src/glsl/ir_uniform.h
+++ b/src/glsl/ir_uniform.h
@@ -162,6 +162,22 @@ struct gl_uniform_storage {
/** @} */
/**
+ * This is a compiler-generated uniform that should not be advertised
+ * via the API.
+ */
+ bool hidden;
+
+ /**
+ * This is a built-in uniform that should not be modified through any gl API.
+ */
+ bool builtin;
+
+ /**
+ * This is a shader storage buffer variable, not an uniform.
+ */
+ bool is_shader_storage;
+
+ /**
* Index within gl_shader_program::AtomicBuffers[] of the atomic
* counter buffer this uniform is stored in, or -1 if this is not
* an atomic counter.
@@ -181,20 +197,16 @@ struct gl_uniform_storage {
unsigned num_compatible_subroutines;
/**
- * This is a compiler-generated uniform that should not be advertised
- * via the API.
+ * A single integer identifying the number of active array elements of
+ * the top-level shader storage block member (GL_TOP_LEVEL_ARRAY_SIZE).
*/
- bool hidden;
+ unsigned top_level_array_size;
/**
- * This is a built-in uniform that should not be modified through any gl API.
+ * A single integer identifying the stride between array elements of the
+ * top-level shader storage block member. (GL_TOP_LEVEL_ARRAY_STRIDE).
*/
- bool builtin;
-
- /**
- * This is a shader storage buffer variable, not an uniform.
- */
- bool is_shader_storage;
+ unsigned top_level_array_stride;
};
#ifdef __cplusplus
diff --git a/src/glsl/ir_variable_refcount.cpp b/src/glsl/ir_variable_refcount.cpp
index e4d825c454b..790627bd1e3 100644
--- a/src/glsl/ir_variable_refcount.cpp
+++ b/src/glsl/ir_variable_refcount.cpp
@@ -46,6 +46,15 @@ static void
free_entry(struct hash_entry *entry)
{
ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) entry->data;
+
+ /* Free assignment list */
+ exec_node *n;
+ while ((n = ivre->assign_list.pop_head()) != NULL) {
+ struct assignment_entry *assignment_entry =
+ exec_node_data(struct assignment_entry, n, link);
+ free(assignment_entry);
+ }
+
delete ivre;
}
@@ -59,7 +68,6 @@ ir_variable_refcount_visitor::~ir_variable_refcount_visitor()
ir_variable_refcount_entry::ir_variable_refcount_entry(ir_variable *var)
{
this->var = var;
- assign = NULL;
assigned_count = 0;
declaration = false;
referenced_count = 0;
@@ -125,8 +133,20 @@ ir_variable_refcount_visitor::visit_leave(ir_assignment *ir)
entry = this->get_variable_entry(ir->lhs->variable_referenced());
if (entry) {
entry->assigned_count++;
- if (entry->assign == NULL)
- entry->assign = ir;
+
+ /* Build a list for dead code optimisation. Don't add assignment if it
+ * was declared out of scope (outside the instruction stream). Also don't
+ * bother adding any more to the list if there are more references than
+ * assignments as this means the variable is used and won't be optimised
+ * out.
+ */
+ assert(entry->referenced_count >= entry->assigned_count);
+ if (entry->referenced_count == entry->assigned_count) {
+ struct assignment_entry *assignment_entry =
+ (struct assignment_entry *)calloc(1, sizeof(*assignment_entry));
+ assignment_entry->assign = ir;
+ entry->assign_list.push_head(&assignment_entry->link);
+ }
}
return visit_continue;
diff --git a/src/glsl/ir_variable_refcount.h b/src/glsl/ir_variable_refcount.h
index c15e8110d04..5c74c314781 100644
--- a/src/glsl/ir_variable_refcount.h
+++ b/src/glsl/ir_variable_refcount.h
@@ -33,13 +33,24 @@
#include "ir_visitor.h"
#include "glsl_types.h"
+struct assignment_entry {
+ exec_node link;
+ ir_assignment *assign;
+};
+
class ir_variable_refcount_entry
{
public:
ir_variable_refcount_entry(ir_variable *var);
ir_variable *var; /* The key: the variable's pointer. */
- ir_assignment *assign; /* An assignment to the variable, if any */
+
+ /**
+ * List of assignments to the variable, if any.
+ * This is intended to be used for dead code optimisation and may
+ * not be a complete list.
+ */
+ exec_list assign_list;
/** Number of times the variable is referenced, including assignments. */
unsigned referenced_count;
diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
index 100d03c4e8f..70ef0e1c891 100644
--- a/src/glsl/link_atomics.cpp
+++ b/src/glsl/link_atomics.cpp
@@ -33,7 +33,7 @@ namespace {
* Atomic counter as seen by the program.
*/
struct active_atomic_counter {
- unsigned id;
+ unsigned uniform_loc;
ir_variable *var;
};
@@ -52,7 +52,7 @@ namespace {
free(counters);
}
- void push_back(unsigned id, ir_variable *var)
+ void push_back(unsigned uniform_loc, ir_variable *var)
{
active_atomic_counter *new_counters;
@@ -66,7 +66,7 @@ namespace {
}
counters = new_counters;
- counters[num_counters].id = id;
+ counters[num_counters].uniform_loc = uniform_loc;
counters[num_counters].var = var;
num_counters++;
}
@@ -95,6 +95,50 @@ namespace {
y->data.atomic.offset < x->data.atomic.offset + x->type->atomic_size()));
}
+ void
+ process_atomic_variable(const glsl_type *t, struct gl_shader_program *prog,
+ unsigned *uniform_loc, ir_variable *var,
+ active_atomic_buffer *const buffers,
+ unsigned *num_buffers, int *offset,
+ const unsigned shader_stage)
+ {
+ /* FIXME: Arrays of arrays get counted separately. For example:
+ * x1[3][3][2] = 9 counters
+ * x2[3][2] = 3 counters
+ * x3[2] = 1 counter
+ *
+ * However this code marks all the counters as active even when they
+ * might not be used.
+ */
+ if (t->is_array() && t->fields.array->is_array()) {
+ for (unsigned i = 0; i < t->length; i++) {
+ process_atomic_variable(t->fields.array, prog, uniform_loc,
+ var, buffers, num_buffers, offset,
+ shader_stage);
+ }
+ } else {
+ active_atomic_buffer *buf = &buffers[var->data.binding];
+ gl_uniform_storage *const storage =
+ &prog->UniformStorage[*uniform_loc];
+
+ /* If this is the first time the buffer is used, increment
+ * the counter of buffers used.
+ */
+ if (buf->size == 0)
+ (*num_buffers)++;
+
+ buf->push_back(*uniform_loc, var);
+
+ buf->stage_references[shader_stage]++;
+ buf->size = MAX2(buf->size, *offset + t->atomic_size());
+
+ storage->offset = *offset;
+ *offset += t->atomic_size();
+
+ (*uniform_loc)++;
+ }
+ }
+
active_atomic_buffer *
find_active_atomic_counters(struct gl_context *ctx,
struct gl_shader_program *prog,
@@ -114,23 +158,10 @@ namespace {
ir_variable *var = node->as_variable();
if (var && var->type->contains_atomic()) {
- unsigned id = 0;
- bool found = prog->UniformHash->get(id, var->name);
- assert(found);
- (void) found;
- active_atomic_buffer *buf = &buffers[var->data.binding];
-
- /* If this is the first time the buffer is used, increment
- * the counter of buffers used.
- */
- if (buf->size == 0)
- (*num_buffers)++;
-
- buf->push_back(id, var);
-
- buf->stage_references[i]++;
- buf->size = MAX2(buf->size, var->data.atomic.offset +
- var->type->atomic_size());
+ int offset = var->data.atomic.offset;
+ unsigned uniform_loc = var->data.location;
+ process_atomic_variable(var->type, prog, &uniform_loc,
+ var, buffers, num_buffers, &offset, i);
}
}
}
@@ -197,10 +228,10 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
/* Assign counter-specific fields. */
for (unsigned j = 0; j < ab.num_counters; j++) {
ir_variable *const var = ab.counters[j].var;
- const unsigned id = ab.counters[j].id;
- gl_uniform_storage *const storage = &prog->UniformStorage[id];
+ gl_uniform_storage *const storage =
+ &prog->UniformStorage[ab.counters[j].uniform_loc];
- mab.Uniforms[j] = id;
+ mab.Uniforms[j] = ab.counters[j].uniform_loc;
if (!var->data.explicit_binding)
var->data.binding = i;
diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp
index bcf17fef758..422739af063 100644
--- a/src/glsl/link_uniform_block_active_visitor.cpp
+++ b/src/glsl/link_uniform_block_active_visitor.cpp
@@ -71,6 +71,88 @@ process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var)
return NULL;
}
+/* For arrays of arrays this function will give us a middle ground between
+ * detecting inactive uniform blocks and structuring them in a way that makes
+ * it easy to calculate the offset for indirect indexing.
+ *
+ * For example given the shader:
+ *
+ * uniform ArraysOfArraysBlock
+ * {
+ * vec4 a;
+ * } i[3][4][5];
+ *
+ * void main()
+ * {
+ * vec4 b = i[0][1][1].a;
+ * gl_Position = i[2][2][3].a + b;
+ * }
+ *
+ * There are only 2 active blocks above but for the sake of indirect indexing
+ * and not over complicating the code we will end up with a count of 8.
+ * Here each dimension has 2 different indices counted so we end up with 2*2*2
+ */
+struct uniform_block_array_elements **
+process_arrays(void *mem_ctx, ir_dereference_array *ir,
+ struct link_uniform_block_active *block)
+{
+ if (ir) {
+ struct uniform_block_array_elements **ub_array_ptr =
+ process_arrays(mem_ctx, ir->array->as_dereference_array(), block);
+ if (*ub_array_ptr == NULL) {
+ *ub_array_ptr = rzalloc(mem_ctx, struct uniform_block_array_elements);
+ (*ub_array_ptr)->ir = ir;
+ }
+
+ struct uniform_block_array_elements *ub_array = *ub_array_ptr;
+ ir_constant *c = ir->array_index->as_constant();
+ if (c) {
+ /* Index is a constant, so mark just that element used,
+ * if not already.
+ */
+ const unsigned idx = c->get_uint_component(0);
+
+ unsigned i;
+ for (i = 0; i < ub_array->num_array_elements; i++) {
+ if (ub_array->array_elements[i] == idx)
+ break;
+ }
+
+ assert(i <= ub_array->num_array_elements);
+
+ if (i == ub_array->num_array_elements) {
+ ub_array->array_elements = reralloc(mem_ctx,
+ ub_array->array_elements,
+ unsigned,
+ ub_array->num_array_elements + 1);
+
+ ub_array->array_elements[ub_array->num_array_elements] = idx;
+
+ ub_array->num_array_elements++;
+ }
+ } else {
+ /* The array index is not a constant,
+ * so mark the entire array used.
+ */
+ assert(ir->array->type->is_array());
+ if (ub_array->num_array_elements < ir->array->type->length) {
+ ub_array->num_array_elements = ir->array->type->length;
+ ub_array->array_elements = reralloc(mem_ctx,
+ ub_array->array_elements,
+ unsigned,
+ ub_array->num_array_elements);
+
+ for (unsigned i = 0; i < ub_array->num_array_elements; i++) {
+ ub_array->array_elements[i] = i;
+ }
+ }
+ }
+ return &ub_array->array;
+ } else {
+ return &block->array;
+ }
+}
+
ir_visitor_status
link_uniform_block_active_visitor::visit(ir_variable *var)
{
@@ -101,24 +183,30 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
return visit_stop;
}
- assert(b->num_array_elements == 0);
- assert(b->array_elements == NULL);
+ assert(b->array == NULL);
assert(b->type != NULL);
assert(!b->type->is_array() || b->has_instance_name);
/* For uniform block arrays declared with a shared or std140 layout
* qualifier, mark all its instances as used.
*/
- if (b->type->is_array() && b->type->length > 0) {
- b->num_array_elements = b->type->length;
- b->array_elements = reralloc(this->mem_ctx,
- b->array_elements,
- unsigned,
- b->num_array_elements);
-
- for (unsigned i = 0; i < b->num_array_elements; i++) {
- b->array_elements[i] = i;
+ const glsl_type *type = b->type;
+ struct uniform_block_array_elements **ub_array = &b->array;
+ while (type->is_array()) {
+ assert(b->type->length > 0);
+
+ *ub_array = rzalloc(this->mem_ctx, struct uniform_block_array_elements);
+ (*ub_array)->num_array_elements = type->length;
+ (*ub_array)->array_elements = reralloc(this->mem_ctx,
+ (*ub_array)->array_elements,
+ unsigned,
+ (*ub_array)->num_array_elements);
+
+ for (unsigned i = 0; i < (*ub_array)->num_array_elements; i++) {
+ (*ub_array)->array_elements[i] = i;
}
+ ub_array = &(*ub_array)->array;
+ type = type->fields.array;
}
return visit_continue;
@@ -127,7 +215,13 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
ir_visitor_status
link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir)
{
- ir_dereference_variable *const d = ir->array->as_dereference_variable();
+ /* cycle through arrays of arrays */
+ ir_dereference_array *base_ir = ir;
+ while (base_ir->array->ir_type == ir_type_dereference_array)
+ base_ir = base_ir->array->as_dereference_array();
+
+ ir_dereference_variable *const d =
+ base_ir->array->as_dereference_variable();
ir_variable *const var = (d == NULL) ? NULL : d->var;
/* If the r-value being dereferenced is not a variable (e.g., a field of a
@@ -158,55 +252,16 @@ link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir)
/* Block arrays must be declared with an instance name.
*/
assert(b->has_instance_name);
- assert((b->num_array_elements == 0) == (b->array_elements == NULL));
assert(b->type != NULL);
/* If the block array was declared with a shared or
* std140 layout qualifier, all its instances have been already marked
* as used in link_uniform_block_active_visitor::visit(ir_variable *).
*/
- if (var->get_interface_type()->interface_packing !=
- GLSL_INTERFACE_PACKING_PACKED)
- return visit_continue_with_parent;
-
- ir_constant *c = ir->array_index->as_constant();
-
- if (c) {
- /* Index is a constant, so mark just that element used, if not already */
- const unsigned idx = c->get_uint_component(0);
-
- unsigned i;
- for (i = 0; i < b->num_array_elements; i++) {
- if (b->array_elements[i] == idx)
- break;
- }
-
- assert(i <= b->num_array_elements);
-
- if (i == b->num_array_elements) {
- b->array_elements = reralloc(this->mem_ctx,
- b->array_elements,
- unsigned,
- b->num_array_elements + 1);
-
- b->array_elements[b->num_array_elements] = idx;
-
- b->num_array_elements++;
- }
- } else {
- /* The array index is not a constant, so mark the entire array used. */
- assert(b->type->is_array());
- if (b->num_array_elements < b->type->length) {
- b->num_array_elements = b->type->length;
- b->array_elements = reralloc(this->mem_ctx,
- b->array_elements,
- unsigned,
- b->num_array_elements);
-
- for (unsigned i = 0; i < b->num_array_elements; i++) {
- b->array_elements[i] = i;
- }
- }
+ if (var->get_interface_type()->interface_packing ==
+ GLSL_INTERFACE_PACKING_PACKED) {
+ b->var = var;
+ process_arrays(this->mem_ctx, ir, b);
}
return visit_continue_with_parent;
@@ -234,8 +289,7 @@ link_uniform_block_active_visitor::visit(ir_dereference_variable *ir)
return visit_stop;
}
- assert(b->num_array_elements == 0);
- assert(b->array_elements == NULL);
+ assert(b->array == NULL);
assert(b->type != NULL);
return visit_continue;
diff --git a/src/glsl/link_uniform_block_active_visitor.h b/src/glsl/link_uniform_block_active_visitor.h
index b663a884db4..afb52c14a37 100644
--- a/src/glsl/link_uniform_block_active_visitor.h
+++ b/src/glsl/link_uniform_block_active_visitor.h
@@ -28,11 +28,20 @@
#include "ir.h"
#include "util/hash_table.h"
+struct uniform_block_array_elements {
+ unsigned *array_elements;
+ unsigned num_array_elements;
+
+ ir_dereference_array *ir;
+
+ struct uniform_block_array_elements *array;
+};
+
struct link_uniform_block_active {
const glsl_type *type;
+ ir_variable *var;
- unsigned *array_elements;
- unsigned num_array_elements;
+ struct uniform_block_array_elements *array;
unsigned binding;
diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index 7ceffee799e..5285d8d01e4 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -116,7 +116,7 @@ private:
char *open_bracket = strchr(v->IndexName, '[');
assert(open_bracket != NULL);
- char *close_bracket = strchr(open_bracket, ']');
+ char *close_bracket = strchr(open_bracket, '.') - 1;
assert(close_bracket != NULL);
/* Length of the tail without the ']' but with the NUL.
@@ -185,6 +185,91 @@ struct block {
bool has_instance_name;
};
+static void
+process_block_array(struct uniform_block_array_elements *ub_array, char **name,
+ size_t name_length, gl_uniform_block *blocks,
+ ubo_visitor *parcel, gl_uniform_buffer_variable *variables,
+ const struct link_uniform_block_active *const b,
+ unsigned *block_index, unsigned *binding_offset,
+ struct gl_context *ctx, struct gl_shader_program *prog)
+{
+ if (ub_array) {
+ for (unsigned j = 0; j < ub_array->num_array_elements; j++) {
+ size_t new_length = name_length;
+
+ /* Append the subscript to the current variable name */
+ ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]",
+ ub_array->array_elements[j]);
+
+ process_block_array(ub_array->array, name, new_length, blocks,
+ parcel, variables, b, block_index,
+ binding_offset, ctx, prog);
+ }
+ } else {
+ unsigned i = *block_index;
+ const glsl_type *type = b->type->without_array();
+
+ blocks[i].Name = ralloc_strdup(blocks, *name);
+ blocks[i].Uniforms = &variables[(*parcel).index];
+
+ /* The GL_ARB_shading_language_420pack spec says:
+ *
+ * "If the binding identifier is used with a uniform block
+ * instanced as an array then the first element of the array
+ * takes the specified block binding and each subsequent
+ * element takes the next consecutive uniform block binding
+ * point."
+ */
+ blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0;
+
+ blocks[i].UniformBufferSize = 0;
+ blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing);
+
+ parcel->process(type, blocks[i].Name);
+
+ blocks[i].UniformBufferSize = parcel->buffer_size;
+
+ /* Check SSBO size is lower than maximum supported size for SSBO */
+ if (b->is_shader_storage &&
+ parcel->buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+ linker_error(prog, "shader storage block `%s' has size %d, "
+ "which is larger than than the maximum allowed (%d)",
+ b->type->name,
+ parcel->buffer_size,
+ ctx->Const.MaxShaderStorageBlockSize);
+ }
+ blocks[i].NumUniforms =
+ (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms);
+ blocks[i].IsShaderStorage = b->is_shader_storage;
+
+ *block_index = *block_index + 1;
+ *binding_offset = *binding_offset + 1;
+ }
+}
+
+/* This function resizes the array types of the block so that later we can use
+ * this new size to correctly calculate the offest for indirect indexing.
+ */
+const glsl_type *
+resize_block_array(const glsl_type *type,
+ struct uniform_block_array_elements *ub_array)
+{
+ if (type->is_array()) {
+ struct uniform_block_array_elements *child_array =
+ type->fields.array->is_array() ? ub_array->array : NULL;
+ const glsl_type *new_child_type =
+ resize_block_array(type->fields.array, child_array);
+
+ const glsl_type *new_type =
+ glsl_type::get_array_instance(new_child_type,
+ ub_array->num_array_elements);
+ ub_array->ir->array->type = new_type;
+ return new_type;
+ } else {
+ return type;
+ }
+}
+
unsigned
link_uniform_blocks(void *mem_ctx,
struct gl_context *ctx,
@@ -223,21 +308,25 @@ link_uniform_blocks(void *mem_ctx,
struct hash_entry *entry;
hash_table_foreach (block_hash, entry) {
- const struct link_uniform_block_active *const b =
- (const struct link_uniform_block_active *) entry->data;
+ struct link_uniform_block_active *const b =
+ (struct link_uniform_block_active *) entry->data;
- const glsl_type *const block_type =
- b->type->is_array() ? b->type->fields.array : b->type;
+ assert((b->array != NULL) == b->type->is_array());
- assert((b->num_array_elements > 0) == b->type->is_array());
+ if (b->array != NULL &&
+ (b->type->without_array()->interface_packing ==
+ GLSL_INTERFACE_PACKING_PACKED)) {
+ b->type = resize_block_array(b->type, b->array);
+ b->var->type = b->type;
+ }
block_size.num_active_uniforms = 0;
- block_size.process(block_type, "");
+ block_size.process(b->type->without_array(), "");
- if (b->num_array_elements > 0) {
- num_blocks += b->num_array_elements;
- num_variables += b->num_array_elements
- * block_size.num_active_uniforms;
+ if (b->array != NULL) {
+ unsigned aoa_size = b->type->arrays_of_arrays_size();
+ num_blocks += aoa_size;
+ num_variables += aoa_size * block_size.num_active_uniforms;
} else {
num_blocks++;
num_variables += block_size.num_active_uniforms;
@@ -281,50 +370,15 @@ link_uniform_blocks(void *mem_ctx,
(const struct link_uniform_block_active *) entry->data;
const glsl_type *block_type = b->type;
- if (b->num_array_elements > 0) {
- const char *const name = block_type->fields.array->name;
+ if (b->array != NULL) {
+ unsigned binding_offset = 0;
+ char *name = ralloc_strdup(NULL, block_type->without_array()->name);
+ size_t name_length = strlen(name);
assert(b->has_instance_name);
- for (unsigned j = 0; j < b->num_array_elements; j++) {
- blocks[i].Name = ralloc_asprintf(blocks, "%s[%u]", name,
- b->array_elements[j]);
- blocks[i].Uniforms = &variables[parcel.index];
-
- /* The GL_ARB_shading_language_420pack spec says:
- *
- * "If the binding identifier is used with a uniform block
- * instanced as an array then the first element of the array
- * takes the specified block binding and each subsequent
- * element takes the next consecutive uniform block binding
- * point."
- */
- blocks[i].Binding = (b->has_binding) ? b->binding + j : 0;
-
- blocks[i].UniformBufferSize = 0;
- blocks[i]._Packing =
- gl_uniform_block_packing(block_type->interface_packing);
-
- parcel.process(block_type->fields.array,
- blocks[i].Name);
-
- blocks[i].UniformBufferSize = parcel.buffer_size;
-
- /* Check SSBO size is lower than maximum supported size for SSBO */
- if (b->is_shader_storage &&
- parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
- linker_error(prog, "shader storage block `%s' has size %d, "
- "which is larger than than the maximum allowed (%d)",
- block_type->name,
- parcel.buffer_size,
- ctx->Const.MaxShaderStorageBlockSize);
- }
- blocks[i].NumUniforms =
- (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
-
- blocks[i].IsShaderStorage = b->is_shader_storage;
-
- i++;
- }
+ process_block_array(b->array, &name, name_length, blocks, &parcel,
+ variables, b, &i, &binding_offset, ctx, prog);
+ ralloc_free(name);
} else {
blocks[i].Name = ralloc_strdup(blocks, block_type->name);
blocks[i].Uniforms = &variables[parcel.index];
diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp
index e9e108a2765..35b9f9c6017 100644
--- a/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@ -49,7 +49,7 @@ get_uniform_block_index(const gl_shader_program *shProg,
const char *uniformBlockName)
{
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
- if (!strcmp(shProg->UniformBlocks[i].Name, uniformBlockName))
+ if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName))
return i;
}
@@ -107,51 +107,64 @@ copy_constant_to_storage(union gl_constant_value *storage,
* they have no storage and should be handled elsewhere.
*/
void
-set_opaque_binding(gl_shader_program *prog, const char *name, int binding)
+set_opaque_binding(void *mem_ctx, gl_shader_program *prog,
+ const glsl_type *type, const char *name, int *binding)
{
- struct gl_uniform_storage *const storage =
- get_storage(prog->UniformStorage, prog->NumUniformStorage, name);
- if (storage == NULL) {
- assert(storage != NULL);
- return;
- }
+ if (type->is_array() && type->fields.array->is_array()) {
+ const glsl_type *const element_type = type->fields.array;
- const unsigned elements = MAX2(storage->array_elements, 1);
+ for (unsigned int i = 0; i < type->length; i++) {
+ const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i);
- /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec
- * says:
- *
- * "If the binding identifier is used with an array, the first element
- * of the array takes the specified unit and each subsequent element
- * takes the next consecutive unit."
- */
- for (unsigned int i = 0; i < elements; i++) {
- storage->storage[i].i = binding + i;
- }
+ set_opaque_binding(mem_ctx, prog, element_type,
+ element_name, binding);
+ }
+ } else {
+ struct gl_uniform_storage *const storage =
+ get_storage(prog->UniformStorage, prog->NumUniformStorage, name);
- for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
- gl_shader *shader = prog->_LinkedShaders[sh];
+ if (storage == NULL) {
+ assert(storage != NULL);
+ return;
+ }
- if (shader) {
- if (storage->type->base_type == GLSL_TYPE_SAMPLER &&
- storage->opaque[sh].active) {
- for (unsigned i = 0; i < elements; i++) {
- const unsigned index = storage->opaque[sh].index + i;
- shader->SamplerUnits[index] = storage->storage[i].i;
- }
+ const unsigned elements = MAX2(storage->array_elements, 1);
+
+ /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec
+ * says:
+ *
+ * "If the binding identifier is used with an array, the first element
+ * of the array takes the specified unit and each subsequent element
+ * takes the next consecutive unit."
+ */
+ for (unsigned int i = 0; i < elements; i++) {
+ storage->storage[i].i = (*binding)++;
+ }
+
+ for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
+ gl_shader *shader = prog->_LinkedShaders[sh];
- } else if (storage->type->base_type == GLSL_TYPE_IMAGE &&
+ if (shader) {
+ if (storage->type->base_type == GLSL_TYPE_SAMPLER &&
+ storage->opaque[sh].active) {
+ for (unsigned i = 0; i < elements; i++) {
+ const unsigned index = storage->opaque[sh].index + i;
+ shader->SamplerUnits[index] = storage->storage[i].i;
+ }
+
+ } else if (storage->type->base_type == GLSL_TYPE_IMAGE &&
storage->opaque[sh].active) {
- for (unsigned i = 0; i < elements; i++) {
- const unsigned index = storage->opaque[sh].index + i;
- shader->ImageUnits[index] = storage->storage[i].i;
+ for (unsigned i = 0; i < elements; i++) {
+ const unsigned index = storage->opaque[sh].index + i;
+ shader->ImageUnits[index] = storage->storage[i].i;
+ }
}
}
}
- }
- storage->initialized = true;
+ storage->initialized = true;
+ }
}
void
@@ -170,7 +183,7 @@ set_block_binding(gl_shader_program *prog, const char *block_name, int binding)
if (stage_index != -1) {
struct gl_shader *sh = prog->_LinkedShaders[i];
- sh->UniformBlocks[stage_index].Binding = binding;
+ sh->BufferInterfaceBlocks[stage_index].Binding = binding;
}
}
}
@@ -180,6 +193,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
const char *name, const glsl_type *type,
ir_constant *val, unsigned int boolean_true)
{
+ const glsl_type *t_without_array = type->without_array();
if (type->is_record()) {
ir_constant *field_constant;
@@ -194,7 +208,8 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
field_constant = (ir_constant *)field_constant->next;
}
return;
- } else if (type->is_array() && type->fields.array->is_record()) {
+ } else if (t_without_array->is_record() ||
+ (type->is_array() && type->fields.array->is_array())) {
const glsl_type *const element_type = type->fields.array;
for (unsigned int i = 0; i < type->length; i++) {
@@ -284,7 +299,9 @@ link_set_uniform_initializers(struct gl_shader_program *prog,
if (type->without_array()->is_sampler() ||
type->without_array()->is_image()) {
- linker::set_opaque_binding(prog, var->name, var->data.binding);
+ int binding = var->data.binding;
+ linker::set_opaque_binding(mem_ctx, prog, var->type,
+ var->name, &binding);
} else if (var->is_in_buffer_block()) {
const glsl_type *const iface_type = var->get_interface_type();
@@ -327,9 +344,9 @@ link_set_uniform_initializers(struct gl_shader_program *prog,
} else {
assert(!"Explicit binding not on a sampler, UBO or atomic.");
}
- } else if (var->constant_value) {
+ } else if (var->constant_initializer) {
linker::set_uniform_initializer(mem_ctx, prog, var->name,
- var->type, var->constant_value,
+ var->type, var->constant_initializer,
boolean_true);
}
}
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 0ccd9c8c865..fe00aa30d07 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -149,7 +149,8 @@ program_resource_visitor::process(ir_variable *var)
recursion(var->type, &name, strlen(name), row_major, NULL, packing,
false, record_array_count);
ralloc_free(name);
- } else if (t->without_array()->is_record()) {
+ } else if (t_without_array->is_record() ||
+ (t->is_array() && t->fields.array->is_array())) {
char *name = ralloc_strdup(NULL, var->name);
recursion(var->type, &name, strlen(name), row_major, NULL, packing,
false, record_array_count);
@@ -160,6 +161,7 @@ program_resource_visitor::process(ir_variable *var)
false, record_array_count);
ralloc_free(name);
} else {
+ this->set_record_array_count(record_array_count);
this->visit_field(t, var->name, row_major, NULL, packing, false);
}
}
@@ -231,7 +233,8 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
this->leave_record(t, *name, row_major, packing);
}
} else if (t->without_array()->is_record() ||
- t->without_array()->is_interface()) {
+ t->without_array()->is_interface() ||
+ (t->is_array() && t->fields.array->is_array())) {
if (record_type == NULL && t->fields.array->is_record())
record_type = t->fields.array;
@@ -387,6 +390,7 @@ private:
{
assert(!type->without_array()->is_record());
assert(!type->without_array()->is_interface());
+ assert(!(type->is_array() && type->fields.array->is_array()));
(void) row_major;
@@ -502,9 +506,9 @@ public:
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
if (strncmp(var->get_interface_type()->name,
- prog->UniformBlocks[i].Name,
+ prog->BufferInterfaceBlocks[i].Name,
l) == 0
- && prog->UniformBlocks[i].Name[l] == '[') {
+ && prog->BufferInterfaceBlocks[i].Name[l] == '[') {
ubo_block_index = i;
break;
}
@@ -512,7 +516,7 @@ public:
} else {
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
if (strcmp(var->get_interface_type()->name,
- prog->UniformBlocks[i].Name) == 0) {
+ prog->BufferInterfaceBlocks[i].Name) == 0) {
ubo_block_index = i;
break;
}
@@ -530,7 +534,7 @@ public:
ubo_byte_offset = 0;
} else {
const struct gl_uniform_block *const block =
- &prog->UniformBlocks[ubo_block_index];
+ &prog->BufferInterfaceBlocks[ubo_block_index];
assert(var->data.location != -1);
@@ -712,6 +716,7 @@ private:
{
assert(!type->without_array()->is_record());
assert(!type->without_array()->is_interface());
+ assert(!(type->is_array() && type->fields.array->is_array()));
unsigned id;
bool found = this->map->get(id, name);
@@ -804,10 +809,11 @@ private:
if (type->is_array()) {
if (packing == GLSL_INTERFACE_PACKING_STD430)
this->uniforms[id].array_stride =
- type->fields.array->std430_array_stride(row_major);
+ type->without_array()->std430_array_stride(row_major);
else
this->uniforms[id].array_stride =
- glsl_align(type->fields.array->std140_size(row_major), 16);
+ glsl_align(type->without_array()->std140_size(row_major),
+ 16);
} else {
this->uniforms[id].array_stride = 0;
}
@@ -966,15 +972,16 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
if (var->type->is_record()) {
sentinel = '.';
- } else if (var->type->without_array()->is_record()) {
+ } else if (var->type->is_array() && (var->type->fields.array->is_array()
+ || var->type->without_array()->is_record())) {
sentinel = '[';
}
const unsigned l = strlen(var->name);
- for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
- for (unsigned j = 0; j < shader->UniformBlocks[i].NumUniforms; j++) {
+ for (unsigned i = 0; i < shader->NumBufferInterfaceBlocks; i++) {
+ for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i].NumUniforms; j++) {
if (sentinel) {
- const char *begin = shader->UniformBlocks[i].Uniforms[j].Name;
+ const char *begin = shader->BufferInterfaceBlocks[i].Uniforms[j].Name;
const char *end = strchr(begin, sentinel);
if (end == NULL)
@@ -989,7 +996,7 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
break;
}
} else if (!strcmp(var->name,
- shader->UniformBlocks[i].Uniforms[j].Name)) {
+ shader->BufferInterfaceBlocks[i].Uniforms[j].Name)) {
found = true;
var->data.location = j;
break;
@@ -1115,10 +1122,10 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
sh->num_uniform_components = uniform_size.num_shader_uniform_components;
sh->num_combined_uniform_components = sh->num_uniform_components;
- for (unsigned i = 0; i < sh->NumUniformBlocks; i++) {
- if (!sh->UniformBlocks[i].IsShaderStorage) {
+ for (unsigned i = 0; i < sh->NumBufferInterfaceBlocks; i++) {
+ if (!sh->BufferInterfaceBlocks[i].IsShaderStorage) {
sh->num_combined_uniform_components +=
- sh->UniformBlocks[i].UniformBufferSize / 4;
+ sh->BufferInterfaceBlocks[i].UniformBufferSize / 4;
}
}
}
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index a97b4ef0a32..25ca928aa43 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -65,6 +65,7 @@
*/
#include <ctype.h>
+#include "util/strndup.h"
#include "main/core.h"
#include "glsl_symbol_table.h"
#include "glsl_parser_extras.h"
@@ -1161,7 +1162,7 @@ cross_validate_uniforms(struct gl_shader_program *prog)
}
/**
- * Accumulates the array of prog->UniformBlocks and checks that all
+ * Accumulates the array of prog->BufferInterfaceBlocks and checks that all
* definitons of blocks agree on their contents.
*/
static bool
@@ -1170,7 +1171,7 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
unsigned max_num_uniform_blocks = 0;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i])
- max_num_uniform_blocks += prog->_LinkedShaders[i]->NumUniformBlocks;
+ max_num_uniform_blocks += prog->_LinkedShaders[i]->NumBufferInterfaceBlocks;
}
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
@@ -1184,15 +1185,15 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
if (sh == NULL)
continue;
- for (unsigned int j = 0; j < sh->NumUniformBlocks; j++) {
+ for (unsigned int j = 0; j < sh->NumBufferInterfaceBlocks; j++) {
int index = link_cross_validate_uniform_block(prog,
- &prog->UniformBlocks,
+ &prog->BufferInterfaceBlocks,
&prog->NumBufferInterfaceBlocks,
- &sh->UniformBlocks[j]);
+ &sh->BufferInterfaceBlocks[j]);
if (index == -1) {
linker_error(prog, "uniform block `%s' has mismatching definitions\n",
- sh->UniformBlocks[j].Name);
+ sh->BufferInterfaceBlocks[j].Name);
return false;
}
@@ -1386,8 +1387,10 @@ public:
virtual ir_visitor_status visit(ir_variable *var)
{
+ const glsl_type *type_without_array;
fixup_type(&var->type, var->data.max_array_access,
var->data.from_ssbo_unsized_array);
+ type_without_array = var->type->without_array();
if (var->type->is_interface()) {
if (interface_contains_unsized_arrays(var->type)) {
const glsl_type *new_type =
@@ -1397,11 +1400,10 @@ public:
var->type = new_type;
var->change_interface_type(new_type);
}
- } else if (var->type->is_array() &&
- var->type->fields.array->is_interface()) {
- if (interface_contains_unsized_arrays(var->type->fields.array)) {
+ } else if (type_without_array->is_interface()) {
+ if (interface_contains_unsized_arrays(type_without_array)) {
const glsl_type *new_type =
- resize_interface_members(var->type->fields.array,
+ resize_interface_members(type_without_array,
var->get_max_ifc_array_access(),
var->is_in_shader_storage_block());
var->change_interface_type(new_type);
@@ -2064,9 +2066,9 @@ link_intrastage_shaders(void *mem_ctx,
linked->ir = new(linked) exec_list;
clone_ir_list(mem_ctx, linked->ir, main->ir);
- linked->UniformBlocks = uniform_blocks;
- linked->NumUniformBlocks = num_uniform_blocks;
- ralloc_steal(linked, linked->UniformBlocks);
+ linked->BufferInterfaceBlocks = uniform_blocks;
+ linked->NumBufferInterfaceBlocks = num_uniform_blocks;
+ ralloc_steal(linked, linked->BufferInterfaceBlocks);
link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders);
link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders);
@@ -2804,19 +2806,19 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
/* Don't check SSBOs for Uniform Block Size */
- if (!prog->UniformBlocks[i].IsShaderStorage &&
- prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
+ if (!prog->BufferInterfaceBlocks[i].IsShaderStorage &&
+ prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
linker_error(prog, "Uniform block %s too big (%d/%d)\n",
- prog->UniformBlocks[i].Name,
- prog->UniformBlocks[i].UniformBufferSize,
+ prog->BufferInterfaceBlocks[i].Name,
+ prog->BufferInterfaceBlocks[i].UniformBufferSize,
ctx->Const.MaxUniformBlockSize);
}
- if (prog->UniformBlocks[i].IsShaderStorage &&
- prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) {
+ if (prog->BufferInterfaceBlocks[i].IsShaderStorage &&
+ prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) {
linker_error(prog, "Shader storage block %s too big (%d/%d)\n",
- prog->UniformBlocks[i].Name,
- prog->UniformBlocks[i].UniformBufferSize,
+ prog->BufferInterfaceBlocks[i].Name,
+ prog->BufferInterfaceBlocks[i].UniformBufferSize,
ctx->Const.MaxShaderStorageBlockSize);
}
@@ -2824,7 +2826,7 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
if (prog->UniformBlockStageIndex[j][i] != -1) {
struct gl_shader *sh = prog->_LinkedShaders[j];
int stage_index = prog->UniformBlockStageIndex[j][i];
- if (sh && sh->UniformBlocks[stage_index].IsShaderStorage) {
+ if (sh && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) {
shader_blocks[j]++;
total_shader_storage_blocks++;
} else {
@@ -2941,7 +2943,7 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
int stage_index = prog->UniformBlockStageIndex[i][j];
- if (stage_index != -1 && sh->UniformBlocks[stage_index].IsShaderStorage)
+ if (stage_index != -1 && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage)
total_shader_storage_blocks++;
}
@@ -3147,7 +3149,7 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
return true;
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
- block_name = shProg->UniformBlocks[i].Name;
+ block_name = shProg->BufferInterfaceBlocks[i].Name;
if (strncmp(block_name, name, strlen(block_name)) == 0) {
found_interface = true;
break;
@@ -3389,6 +3391,242 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage)
return true;
}
+static char*
+get_top_level_name(const char *name)
+{
+ const char *first_dot = strchr(name, '.');
+ const char *first_square_bracket = strchr(name, '[');
+ int name_size = 0;
+ /* From ARB_program_interface_query spec:
+ *
+ * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the
+ * number of active array elements of the top-level shader storage block
+ * member containing to the active variable is written to <params>. If the
+ * top-level block member is not declared as an array, the value one is
+ * written to <params>. If the top-level block member is an array with no
+ * declared size, the value zero is written to <params>.
+ */
+
+ /* The buffer variable is on top level.*/
+ if (!first_square_bracket && !first_dot)
+ name_size = strlen(name);
+ else if ((!first_square_bracket ||
+ (first_dot && first_dot < first_square_bracket)))
+ name_size = first_dot - name;
+ else
+ name_size = first_square_bracket - name;
+
+ return strndup(name, name_size);
+}
+
+static char*
+get_var_name(const char *name)
+{
+ const char *first_dot = strchr(name, '.');
+
+ if (!first_dot)
+ return strdup(name);
+
+ return strndup(first_dot+1, strlen(first_dot) - 1);
+}
+
+static bool
+is_top_level_shader_storage_block_member(const char* name,
+ const char* interface_name,
+ const char* field_name)
+{
+ bool result = false;
+
+ /* If the given variable is already a top-level shader storage
+ * block member, then return array_size = 1.
+ * We could have two possibilities: if we have an instanced
+ * shader storage block or not instanced.
+ *
+ * For the first, we check create a name as it was in top level and
+ * compare it with the real name. If they are the same, then
+ * the variable is already at top-level.
+ *
+ * Full instanced name is: interface name + '.' + var name +
+ * NULL character
+ */
+ int name_length = strlen(interface_name) + 1 + strlen(field_name) + 1;
+ char *full_instanced_name = (char *) calloc(name_length, sizeof(char));
+ if (!full_instanced_name) {
+ fprintf(stderr, "%s: Cannot allocate space for name\n", __func__);
+ return false;
+ }
+
+ snprintf(full_instanced_name, name_length, "%s.%s",
+ interface_name, field_name);
+
+ /* Check if its top-level shader storage block member of an
+ * instanced interface block, or of a unnamed interface block.
+ */
+ if (strcmp(name, full_instanced_name) == 0 ||
+ strcmp(name, field_name) == 0)
+ result = true;
+
+ free(full_instanced_name);
+ return result;
+}
+
+static void
+calculate_array_size(struct gl_shader_program *shProg,
+ struct gl_uniform_storage *uni)
+{
+ int block_index = uni->block_index;
+ int array_size = -1;
+ char *var_name = get_top_level_name(uni->name);
+ char *interface_name =
+ get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name);
+
+ if (strcmp(var_name, interface_name) == 0) {
+ /* Deal with instanced array of SSBOs */
+ char *temp_name = get_var_name(uni->name);
+ free(var_name);
+ var_name = get_top_level_name(temp_name);
+ free(temp_name);
+ }
+
+ for (unsigned i = 0; i < shProg->NumShaders; i++) {
+ if (shProg->Shaders[i] == NULL)
+ continue;
+
+ const gl_shader *stage = shProg->Shaders[i];
+ foreach_in_list(ir_instruction, node, stage->ir) {
+ ir_variable *var = node->as_variable();
+ if (!var || !var->get_interface_type() ||
+ var->data.mode != ir_var_shader_storage)
+ continue;
+
+ const glsl_type *interface = var->get_interface_type();
+
+ if (strcmp(interface_name, interface->name) != 0)
+ continue;
+
+ for (unsigned i = 0; i < interface->length; i++) {
+ const glsl_struct_field *field = &interface->fields.structure[i];
+ if (strcmp(field->name, var_name) != 0)
+ continue;
+ /* From GL_ARB_program_interface_query spec:
+ *
+ * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer
+ * identifying the number of active array elements of the top-level
+ * shader storage block member containing to the active variable is
+ * written to <params>. If the top-level block member is not
+ * declared as an array, the value one is written to <params>. If
+ * the top-level block member is an array with no declared size,
+ * the value zero is written to <params>.
+ */
+ if (is_top_level_shader_storage_block_member(uni->name,
+ interface_name,
+ var_name))
+ array_size = 1;
+ else if (field->type->is_unsized_array())
+ array_size = 0;
+ else if (field->type->is_array())
+ array_size = field->type->length;
+ else
+ array_size = 1;
+
+ goto found_top_level_array_size;
+ }
+ }
+ }
+found_top_level_array_size:
+ free(interface_name);
+ free(var_name);
+ uni->top_level_array_size = array_size;
+}
+
+static void
+calculate_array_stride(struct gl_shader_program *shProg,
+ struct gl_uniform_storage *uni)
+{
+ int block_index = uni->block_index;
+ int array_stride = -1;
+ char *var_name = get_top_level_name(uni->name);
+ char *interface_name =
+ get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name);
+
+ if (strcmp(var_name, interface_name) == 0) {
+ /* Deal with instanced array of SSBOs */
+ char *temp_name = get_var_name(uni->name);
+ free(var_name);
+ var_name = get_top_level_name(temp_name);
+ free(temp_name);
+ }
+
+ for (unsigned i = 0; i < shProg->NumShaders; i++) {
+ if (shProg->Shaders[i] == NULL)
+ continue;
+
+ const gl_shader *stage = shProg->Shaders[i];
+ foreach_in_list(ir_instruction, node, stage->ir) {
+ ir_variable *var = node->as_variable();
+ if (!var || !var->get_interface_type() ||
+ var->data.mode != ir_var_shader_storage)
+ continue;
+
+ const glsl_type *interface = var->get_interface_type();
+
+ if (strcmp(interface_name, interface->name) != 0) {
+ continue;
+ }
+
+ for (unsigned i = 0; i < interface->length; i++) {
+ const glsl_struct_field *field = &interface->fields.structure[i];
+ if (strcmp(field->name, var_name) != 0)
+ continue;
+ /* From GL_ARB_program_interface_query:
+ *
+ * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer
+ * identifying the stride between array elements of the top-level
+ * shader storage block member containing the active variable is
+ * written to <params>. For top-level block members declared as
+ * arrays, the value written is the difference, in basic machine
+ * units, between the offsets of the active variable for
+ * consecutive elements in the top-level array. For top-level
+ * block members not declared as an array, zero is written to
+ * <params>."
+ */
+ if (field->type->is_array()) {
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(field->matrix_layout);
+ bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+ const glsl_type *array_type = field->type->fields.array;
+
+ if (is_top_level_shader_storage_block_member(uni->name,
+ interface_name,
+ var_name)) {
+ array_stride = 0;
+ goto found_top_level_array_stride;
+ }
+ if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
+ if (array_type->is_record() || array_type->is_array()) {
+ array_stride = array_type->std140_size(row_major);
+ array_stride = glsl_align(array_stride, 16);
+ } else {
+ unsigned element_base_align = 0;
+ element_base_align = array_type->std140_base_alignment(row_major);
+ array_stride = MAX2(element_base_align, 16);
+ }
+ } else {
+ array_stride = array_type->std430_array_stride(row_major);
+ }
+ } else {
+ array_stride = 0;
+ }
+ goto found_top_level_array_stride;
+ }
+ }
+ }
+found_top_level_array_stride:
+ free(interface_name);
+ free(var_name);
+ uni->top_level_array_stride = array_stride;
+}
+
/**
* Builds up a list of program resources that point to existing
* resource data.
@@ -3473,6 +3711,11 @@ build_program_resource_list(struct gl_shader_program *shProg)
shProg->UniformStorage[i].name))
continue;
+ if (is_shader_storage) {
+ calculate_array_size(shProg, &shProg->UniformStorage[i]);
+ calculate_array_stride(shProg, &shProg->UniformStorage[i]);
+ }
+
if (!add_program_resource(shProg, type,
&shProg->UniformStorage[i], stageref))
return;
@@ -3480,10 +3723,10 @@ build_program_resource_list(struct gl_shader_program *shProg)
/* Add program uniform blocks and shader storage blocks. */
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
- bool is_shader_storage = shProg->UniformBlocks[i].IsShaderStorage;
+ bool is_shader_storage = shProg->BufferInterfaceBlocks[i].IsShaderStorage;
GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK;
if (!add_program_resource(shProg, type,
- &shProg->UniformBlocks[i], 0))
+ &shProg->BufferInterfaceBlocks[i], 0))
return;
}
@@ -3599,6 +3842,42 @@ link_assign_subroutine_types(struct gl_shader_program *prog)
}
}
+static void
+split_ubos_and_ssbos(void *mem_ctx,
+ struct gl_uniform_block *blocks,
+ unsigned num_blocks,
+ struct gl_uniform_block ***ubos,
+ unsigned *num_ubos,
+ struct gl_uniform_block ***ssbos,
+ unsigned *num_ssbos)
+{
+ unsigned num_ubo_blocks = 0;
+ unsigned num_ssbo_blocks = 0;
+
+ for (unsigned i = 0; i < num_blocks; i++) {
+ if (blocks[i].IsShaderStorage)
+ num_ssbo_blocks++;
+ else
+ num_ubo_blocks++;
+ }
+
+ *ubos = ralloc_array(mem_ctx, gl_uniform_block *, num_ubo_blocks);
+ *num_ubos = 0;
+
+ *ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks);
+ *num_ssbos = 0;
+
+ for (unsigned i = 0; i < num_blocks; i++) {
+ if (blocks[i].IsShaderStorage) {
+ (*ssbos)[(*num_ssbos)++] = &blocks[i];
+ } else {
+ (*ubos)[(*num_ubos)++] = &blocks[i];
+ }
+ }
+
+ assert(*num_ubos + *num_ssbos == num_blocks);
+}
+
void
link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
{
@@ -4110,6 +4389,31 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
}
}
+ /* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks
+ * for gl_shader_program and gl_shader, so that drivers that need separate
+ * index spaces for each set can have that.
+ */
+ for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) {
+ if (prog->_LinkedShaders[i] != NULL) {
+ gl_shader *sh = prog->_LinkedShaders[i];
+ split_ubos_and_ssbos(sh,
+ sh->BufferInterfaceBlocks,
+ sh->NumBufferInterfaceBlocks,
+ &sh->UniformBlocks,
+ &sh->NumUniformBlocks,
+ &sh->ShaderStorageBlocks,
+ &sh->NumShaderStorageBlocks);
+ }
+ }
+
+ split_ubos_and_ssbos(prog,
+ prog->BufferInterfaceBlocks,
+ prog->NumBufferInterfaceBlocks,
+ &prog->UniformBlocks,
+ &prog->NumUniformBlocks,
+ &prog->ShaderStorageBlocks,
+ &prog->NumShaderStorageBlocks);
+
/* FINISHME: Assign fragment shader output locations. */
done:
diff --git a/src/glsl/lower_named_interface_blocks.cpp b/src/glsl/lower_named_interface_blocks.cpp
index 01bbdd0587e..276a2dedf47 100644
--- a/src/glsl/lower_named_interface_blocks.cpp
+++ b/src/glsl/lower_named_interface_blocks.cpp
@@ -65,6 +65,39 @@
#include "ir_rvalue_visitor.h"
#include "program/hash_table.h"
+static const glsl_type *
+process_array_type(const glsl_type *type, unsigned idx)
+{
+ const glsl_type *element_type = type->fields.array;
+ if (element_type->is_array()) {
+ const glsl_type *new_array_type = process_array_type(element_type, idx);
+ return glsl_type::get_array_instance(new_array_type, type->length);
+ } else {
+ return glsl_type::get_array_instance(
+ element_type->fields.structure[idx].type, type->length);
+ }
+}
+
+static ir_rvalue *
+process_array_ir(void * const mem_ctx,
+ ir_dereference_array *deref_array_prev,
+ ir_rvalue *deref_var)
+{
+ ir_dereference_array *deref_array =
+ deref_array_prev->array->as_dereference_array();
+
+ if (deref_array == NULL) {
+ return new(mem_ctx) ir_dereference_array(deref_var,
+ deref_array_prev->array_index);
+ } else {
+ deref_array = (ir_dereference_array *) process_array_ir(mem_ctx,
+ deref_array,
+ deref_var);
+ return new(mem_ctx) ir_dereference_array(deref_array,
+ deref_array_prev->array_index);
+ }
+}
+
namespace {
class flatten_named_interface_blocks_declarations : public ir_rvalue_visitor
@@ -112,15 +145,9 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
var->data.mode == ir_var_shader_storage)
continue;
- const glsl_type * iface_t = var->type;
- const glsl_type * array_t = NULL;
+ const glsl_type * iface_t = var->type->without_array();
exec_node *insert_pos = var;
- if (iface_t->is_array()) {
- array_t = iface_t;
- iface_t = array_t->fields.array;
- }
-
assert (iface_t->is_interface());
for (unsigned i = 0; i < iface_t->length; i++) {
@@ -137,7 +164,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
ir_variable *new_var;
char *var_name =
ralloc_strdup(mem_ctx, iface_t->fields.structure[i].name);
- if (array_t == NULL) {
+ if (!var->type->is_array()) {
new_var =
new(mem_ctx) ir_variable(iface_t->fields.structure[i].type,
var_name,
@@ -145,9 +172,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
new_var->data.from_named_ifc_block_nonarray = 1;
} else {
const glsl_type *new_array_type =
- glsl_type::get_array_instance(
- iface_t->fields.structure[i].type,
- array_t->length);
+ process_array_type(var->type, i);
new_var =
new(mem_ctx) ir_variable(new_array_type,
var_name,
@@ -236,9 +261,8 @@ flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue)
ir_dereference_array *deref_array =
ir->record->as_dereference_array();
if (deref_array != NULL) {
- *rvalue =
- new(mem_ctx) ir_dereference_array(deref_var,
- deref_array->array_index);
+ *rvalue = process_array_ir(mem_ctx, deref_array,
+ (ir_rvalue *)deref_var);
} else {
*rvalue = deref_var;
}
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 247620e6148..e818c048461 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -203,55 +203,114 @@ static const char *
interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
ir_rvalue **nonconst_block_index)
{
- ir_rvalue *previous_index = NULL;
*nonconst_block_index = NULL;
+ char *name_copy = NULL;
+ size_t base_length = 0;
+
+ /* Loop back through the IR until we find the uniform block */
+ ir_rvalue *ir = d;
+ while (ir != NULL) {
+ switch (ir->ir_type) {
+ case ir_type_dereference_variable: {
+ /* Exit loop */
+ ir = NULL;
+ break;
+ }
+
+ case ir_type_dereference_record: {
+ ir_dereference_record *r = (ir_dereference_record *) ir;
+ ir = r->record->as_dereference();
+
+ /* If we got here it means any previous array subscripts belong to
+ * block members and not the block itself so skip over them in the
+ * next pass.
+ */
+ d = ir;
+ break;
+ }
+
+ case ir_type_dereference_array: {
+ ir_dereference_array *a = (ir_dereference_array *) ir;
+ ir = a->array->as_dereference();
+ break;
+ }
+
+ case ir_type_swizzle: {
+ ir_swizzle *s = (ir_swizzle *) ir;
+ ir = s->val->as_dereference();
+ break;
+ }
+
+ default:
+ assert(!"Should not get here.");
+ break;
+ }
+ }
while (d != NULL) {
switch (d->ir_type) {
case ir_type_dereference_variable: {
ir_dereference_variable *v = (ir_dereference_variable *) d;
- if (previous_index
- && v->var->is_interface_instance()
- && v->var->type->is_array()) {
-
- ir_constant *const_index = previous_index->as_constant();
- if (!const_index) {
- *nonconst_block_index = previous_index;
- return ralloc_asprintf(mem_ctx, "%s[0]", base_name);
- } else {
- return ralloc_asprintf(mem_ctx,
- "%s[%d]",
- base_name,
- const_index->get_uint_component(0));
- }
+ if (name_copy != NULL &&
+ v->var->is_interface_instance() &&
+ v->var->type->is_array()) {
+ return name_copy;
} else {
+ *nonconst_block_index = NULL;
return base_name;
}
break;
}
- case ir_type_dereference_record: {
- ir_dereference_record *r = (ir_dereference_record *) d;
-
- d = r->record->as_dereference();
- break;
- }
-
case ir_type_dereference_array: {
ir_dereference_array *a = (ir_dereference_array *) d;
+ size_t new_length;
+
+ if (name_copy == NULL) {
+ name_copy = ralloc_strdup(mem_ctx, base_name);
+ base_length = strlen(name_copy);
+ }
+
+ /* For arrays of arrays we start at the innermost array and work our
+ * way out so we need to insert the subscript at the base of the
+ * name string rather than just attaching it to the end.
+ */
+ new_length = base_length;
+ ir_constant *const_index = a->array_index->as_constant();
+ char *end = ralloc_strdup(NULL, &name_copy[new_length]);
+ if (!const_index) {
+ ir_rvalue *array_index = a->array_index;
+ if (array_index->type != glsl_type::uint_type)
+ array_index = i2u(array_index);
+
+ if (a->array->type->is_array() &&
+ a->array->type->fields.array->is_array()) {
+ ir_constant *base_size = new(mem_ctx)
+ ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
+ array_index = mul(array_index, base_size);
+ }
+
+ if (*nonconst_block_index) {
+ *nonconst_block_index = add(*nonconst_block_index, array_index);
+ } else {
+ *nonconst_block_index = array_index;
+ }
+
+ ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
+ end);
+ } else {
+ ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
+ const_index->get_uint_component(0),
+ end);
+ }
+ ralloc_free(end);
d = a->array->as_dereference();
- previous_index = a->array_index;
break;
}
- case ir_type_swizzle: {
- ir_swizzle *s = (ir_swizzle *) d;
- d = s->val->as_dereference();
- break;
- }
default:
assert(!"Should not get here.");
break;
@@ -277,27 +336,31 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
deref, &nonconst_block_index);
- /* Locate the ubo block by interface name */
+ /* Locate the block by interface name */
+ this->is_shader_storage = var->is_in_shader_storage_block();
+ unsigned num_blocks;
+ struct gl_uniform_block **blocks;
+ if (this->is_shader_storage) {
+ num_blocks = shader->NumShaderStorageBlocks;
+ blocks = shader->ShaderStorageBlocks;
+ } else {
+ num_blocks = shader->NumUniformBlocks;
+ blocks = shader->UniformBlocks;
+ }
this->uniform_block = NULL;
- for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
- if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) {
+ for (unsigned i = 0; i < num_blocks; i++) {
+ if (strcmp(field_name, blocks[i]->Name) == 0) {
ir_constant *index = new(mem_ctx) ir_constant(i);
if (nonconst_block_index) {
- if (nonconst_block_index->type != glsl_type::uint_type)
- nonconst_block_index = i2u(nonconst_block_index);
this->uniform_block = add(nonconst_block_index, index);
} else {
this->uniform_block = index;
}
- this->is_shader_storage = shader->UniformBlocks[i].IsShaderStorage;
-
- struct gl_uniform_block *block = &shader->UniformBlocks[i];
-
this->ubo_var = var->is_interface_instance()
- ? &block->Uniforms[0] : &block->Uniforms[var->data.location];
+ ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
break;
}
@@ -335,7 +398,7 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
if (deref_array->array->type->is_double())
array_stride *= 2;
*matrix_columns = deref_array->array->type->matrix_columns;
- } else if (deref_array->type->is_interface()) {
+ } else if (deref_array->type->without_array()->is_interface()) {
/* We're processing an array dereference of an interface instance
* array. The thing being dereferenced *must* be a variable
* dereference because interfaces cannot be embedded in other
@@ -344,7 +407,6 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
* interface instance array will have the same offsets relative to
* the base of the block that backs them.
*/
- assert(deref_array->array->as_dereference_variable());
deref = deref_array->array->as_dereference();
break;
} else {
@@ -744,7 +806,31 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
* or 32 depending on the number of columns.
*/
assert(matrix_columns <= 4);
- unsigned matrix_stride = glsl_align(matrix_columns * N, 16);
+ unsigned matrix_stride = 0;
+ /* Matrix stride for std430 mat2xY matrices are not rounded up to
+ * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform
+ * Block Layout":
+ *
+ * "2. If the member is a two- or four-component vector with components
+ * consuming N basic machine units, the base alignment is 2N or 4N,
+ * respectively." [...]
+ * "4. If the member is an array of scalars or vectors, the base alignment
+ * and array stride are set to match the base alignment of a single array
+ * element, according to rules (1), (2), and (3), and rounded up to the
+ * base alignment of a vec4." [...]
+ * "7. If the member is a row-major matrix with C columns and R rows, the
+ * matrix is stored identically to an array of R row vectors with C
+ * components each, according to rule (4)." [...]
+ * "When using the std430 storage layout, shader storage blocks will be
+ * laid out in buffer storage identically to uniform and shader storage
+ * blocks using the std140 layout, except that the base alignment and
+ * stride of arrays of scalars and vectors in rule 4 and of structures in
+ * rule 9 are not rounded up a multiple of the base alignment of a vec4."
+ */
+ if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2)
+ matrix_stride = 2 * N;
+ else
+ matrix_stride = glsl_align(matrix_columns * N, 16);
const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
glsl_type::float_type : glsl_type::double_type;
diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp
index 0c3394a504b..b6238825f8a 100644
--- a/src/glsl/lower_vec_index_to_cond_assign.cpp
+++ b/src/glsl/lower_vec_index_to_cond_assign.cpp
@@ -88,7 +88,9 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_
exec_list list;
/* Store the index to a temporary to avoid reusing its tree. */
- index = new(base_ir) ir_variable(glsl_type::int_type,
+ assert(orig_index->type == glsl_type::int_type ||
+ orig_index->type == glsl_type::uint_type);
+ index = new(base_ir) ir_variable(orig_index->type,
"vec_index_tmp_i",
ir_var_temporary);
list.push_tail(index);
diff --git a/src/glsl/lower_vector_insert.cpp b/src/glsl/lower_vector_insert.cpp
index 6d7cfa94262..26d31b03c12 100644
--- a/src/glsl/lower_vector_insert.cpp
+++ b/src/glsl/lower_vector_insert.cpp
@@ -108,9 +108,13 @@ vector_insert_visitor::handle_rvalue(ir_rvalue **rv)
factory.emit(assign(temp, expr->operands[0]));
factory.emit(assign(src_temp, expr->operands[1]));
+ assert(expr->operands[2]->type == glsl_type::int_type ||
+ expr->operands[2]->type == glsl_type::uint_type);
+
for (unsigned i = 0; i < expr->type->vector_elements; i++) {
ir_constant *const cmp_index =
- new(factory.mem_ctx) ir_constant(int(i));
+ ir_constant::zero(factory.mem_ctx, expr->operands[2]->type);
+ cmp_index->value.u[0] = i;
ir_variable *const cmp_result =
factory.make_temp(glsl_type::bool_type, "index_condition");
diff --git a/src/glsl/builtin_type_macros.h b/src/glsl/nir/builtin_type_macros.h
index 8e16ae45489..8e16ae45489 100644
--- a/src/glsl/builtin_type_macros.h
+++ b/src/glsl/nir/builtin_type_macros.h
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 6bedb4eb8e6..e57e834d948 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -152,11 +152,13 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
if (sh->Program->SamplersUsed & (1 << i))
num_textures = i;
- shader->info.name = ralloc_asprintf(shader, "GLSL%d", sh->Name);
+ shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
+ if (shader_prog->Label)
+ shader->info.label = ralloc_strdup(shader, shader_prog->Label);
shader->info.num_textures = num_textures;
shader->info.num_ubos = sh->NumUniformBlocks;
shader->info.num_abos = shader_prog->NumAtomicBuffers;
- shader->info.num_ssbos = shader_prog->NumBufferInterfaceBlocks;
+ shader->info.num_ssbos = sh->NumShaderStorageBlocks;
shader->info.num_images = sh->NumImages;
shader->info.inputs_read = sh->Program->InputsRead;
shader->info.outputs_written = sh->Program->OutputsWritten;
@@ -164,11 +166,37 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
shader->info.uses_texture_gather = sh->Program->UsesGather;
shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut;
shader->info.separate_shader = shader_prog->SeparateShader;
- shader->info.gs.vertices_out = sh->Geom.VerticesOut;
- shader->info.gs.invocations = sh->Geom.Invocations;
shader->info.has_transform_feedback_varyings =
shader_prog->TransformFeedback.NumVarying > 0;
+ switch (stage) {
+ case MESA_SHADER_GEOMETRY:
+ shader->info.gs.vertices_out = sh->Geom.VerticesOut;
+ shader->info.gs.invocations = sh->Geom.Invocations;
+ break;
+
+ case MESA_SHADER_FRAGMENT: {
+ struct gl_fragment_program *fp =
+ (struct gl_fragment_program *)sh->Program;
+
+ shader->info.fs.uses_discard = fp->UsesKill;
+ shader->info.fs.early_fragment_tests = sh->EarlyFragmentTests;
+ shader->info.fs.depth_layout = fp->FragDepthLayout;
+ break;
+ }
+
+ case MESA_SHADER_COMPUTE: {
+ struct gl_compute_program *cp = (struct gl_compute_program *)sh->Program;
+ shader->info.cs.local_size[0] = cp->LocalSize[0];
+ shader->info.cs.local_size[1] = cp->LocalSize[1];
+ shader->info.cs.local_size[2] = cp->LocalSize[2];
+ break;
+ }
+
+ default:
+ break; /* No stage-specific info */
+ }
+
return shader;
}
@@ -393,35 +421,10 @@ nir_visitor::visit(ir_variable *ir)
var->interface_type = ir->get_interface_type();
- switch (var->data.mode) {
- case nir_var_local:
- exec_list_push_tail(&impl->locals, &var->node);
- break;
-
- case nir_var_global:
- exec_list_push_tail(&shader->globals, &var->node);
- break;
-
- case nir_var_shader_in:
- exec_list_push_tail(&shader->inputs, &var->node);
- break;
-
- case nir_var_shader_out:
- exec_list_push_tail(&shader->outputs, &var->node);
- break;
-
- case nir_var_uniform:
- case nir_var_shader_storage:
- exec_list_push_tail(&shader->uniforms, &var->node);
- break;
-
- case nir_var_system_value:
- exec_list_push_tail(&shader->system_values, &var->node);
- break;
-
- default:
- unreachable("not reached");
- }
+ if (var->data.mode == nir_var_local)
+ nir_function_impl_add_variable(impl, var);
+ else
+ nir_shader_add_variable(shader, var);
_mesa_hash_table_insert(var_table, ir, var);
this->var = var;
@@ -695,9 +698,21 @@ nir_visitor::visit(ir_call *ir)
} else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) {
op = nir_intrinsic_ssbo_atomic_xor;
} else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) {
- op = nir_intrinsic_ssbo_atomic_min;
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_ssbo_atomic_imin;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_ssbo_atomic_umin;
+ else
+ unreachable("Invalid type");
} else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) {
- op = nir_intrinsic_ssbo_atomic_max;
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_ssbo_atomic_imax;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_ssbo_atomic_umax;
+ else
+ unreachable("Invalid type");
} else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) {
op = nir_intrinsic_ssbo_atomic_exchange;
} else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
@@ -906,8 +921,10 @@ nir_visitor::visit(ir_call *ir)
break;
}
case nir_intrinsic_ssbo_atomic_add:
- case nir_intrinsic_ssbo_atomic_min:
- case nir_intrinsic_ssbo_atomic_max:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_ssbo_atomic_xor:
@@ -2065,13 +2082,10 @@ nir_visitor::visit(ir_constant *ir)
* constant initializer and return a dereference.
*/
- nir_variable *var = ralloc(this->shader, nir_variable);
- var->name = ralloc_strdup(var, "const_temp");
- var->type = ir->type;
- var->data.mode = nir_var_local;
+ nir_variable *var =
+ nir_local_variable_create(this->impl, ir->type, "const_temp");
var->data.read_only = true;
var->constant_initializer = constant_copy(ir, var);
- exec_list_push_tail(&this->impl->locals, &var->node);
this->deref_head = nir_deref_var_create(this->shader, var);
this->deref_tail = &this->deref_head->deref;
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp
index 9ef2fbf2525..309f9dca61e 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/nir/glsl_types.cpp
@@ -1175,7 +1175,22 @@ glsl_type::record_location_offset(unsigned length) const
const glsl_type *wa = st->without_array();
if (wa->is_record()) {
unsigned r_offset = wa->record_location_offset(wa->length);
- offset += st->is_array() ? st->length * r_offset : r_offset;
+ offset += st->is_array() ?
+ st->arrays_of_arrays_size() * r_offset : r_offset;
+ } else if (st->is_array() && st->fields.array->is_array()) {
+ unsigned outer_array_size = st->length;
+ const glsl_type *base_type = st->fields.array;
+
+ /* For arrays of arrays the outer arrays take up a uniform
+ * slot for each element. The innermost array elements share a
+ * single slot so we ignore the innermost array when calculating
+ * the offset.
+ */
+ while (base_type->fields.array->is_array()) {
+ outer_array_size = outer_array_size * base_type->length;
+ base_type = base_type->fields.array;
+ }
+ offset += outer_array_size;
} else {
/* We dont worry about arrays here because unless the array
* contains a structure or another array it only takes up a single
@@ -1419,8 +1434,8 @@ glsl_type::std140_size(bool row_major) const
unsigned int array_len;
if (this->is_array()) {
- element_type = this->fields.array;
- array_len = this->length;
+ element_type = this->without_array();
+ array_len = this->arrays_of_arrays_size();
} else {
element_type = this;
array_len = 1;
@@ -1453,12 +1468,13 @@ glsl_type::std140_size(bool row_major) const
* the array are laid out in order, according to rule (9).
*/
if (this->is_array()) {
- if (this->fields.array->is_record()) {
- return this->length * this->fields.array->std140_size(row_major);
+ if (this->without_array()->is_record()) {
+ return this->arrays_of_arrays_size() *
+ this->without_array()->std140_size(row_major);
} else {
- unsigned element_base_align =
- this->fields.array->std140_base_alignment(row_major);
- return this->length * MAX2(element_base_align, 16);
+ unsigned element_base_align =
+ this->without_array()->std140_base_alignment(row_major);
+ return this->arrays_of_arrays_size() * MAX2(element_base_align, 16);
}
}
@@ -1818,3 +1834,17 @@ glsl_type::coordinate_components() const
return size;
}
+
+/**
+ * Declarations of type flyweights (glsl_type::_foo_type) and
+ * convenience pointers (glsl_type::foo_type).
+ * @{
+ */
+#define DECL_TYPE(NAME, ...) \
+ const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \
+ const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type;
+
+#define STRUCT_TYPE(NAME)
+
+#include "builtin_type_macros.h"
+/** @} */
diff --git a/src/glsl/glsl_types.h b/src/glsl/nir/glsl_types.h
index b83e1ca3d2c..b83e1ca3d2c 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/nir/glsl_types.h
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index e12da805281..793bdafb54b 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -103,6 +103,72 @@ nir_reg_remove(nir_register *reg)
exec_node_remove(&reg->node);
}
+void
+nir_shader_add_variable(nir_shader *shader, nir_variable *var)
+{
+ switch (var->data.mode) {
+ case nir_var_local:
+ assert(!"nir_shader_add_variable cannot be used for local variables");
+ break;
+
+ case nir_var_global:
+ exec_list_push_tail(&shader->globals, &var->node);
+ break;
+
+ case nir_var_shader_in:
+ exec_list_push_tail(&shader->inputs, &var->node);
+ break;
+
+ case nir_var_shader_out:
+ exec_list_push_tail(&shader->outputs, &var->node);
+ break;
+
+ case nir_var_uniform:
+ case nir_var_shader_storage:
+ exec_list_push_tail(&shader->uniforms, &var->node);
+ break;
+
+ case nir_var_system_value:
+ exec_list_push_tail(&shader->system_values, &var->node);
+ break;
+ }
+}
+
+nir_variable *
+nir_variable_create(nir_shader *shader, nir_variable_mode mode,
+ const struct glsl_type *type, const char *name)
+{
+ nir_variable *var = rzalloc(shader, nir_variable);
+ var->name = ralloc_strdup(var, name);
+ var->type = type;
+ var->data.mode = mode;
+
+ if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) ||
+ (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT))
+ var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+
+ if (mode == nir_var_shader_in || mode == nir_var_uniform)
+ var->data.read_only = true;
+
+ nir_shader_add_variable(shader, var);
+
+ return var;
+}
+
+nir_variable *
+nir_local_variable_create(nir_function_impl *impl,
+ const struct glsl_type *type, const char *name)
+{
+ nir_variable *var = rzalloc(impl->overload->function->shader, nir_variable);
+ var->name = ralloc_strdup(var, name);
+ var->type = type;
+ var->data.mode = nir_var_local;
+
+ nir_function_impl_add_variable(impl, var);
+
+ return var;
+}
+
nir_function *
nir_function_create(nir_shader *shader, const char *name)
{
@@ -1080,31 +1146,33 @@ nir_src_as_const_value(nir_src src)
return &load->value;
}
+/**
+ * Returns true if the source is known to be dynamically uniform. Otherwise it
+ * returns false which means it may or may not be dynamically uniform but it
+ * can't be determined.
+ */
bool
-nir_srcs_equal(nir_src src1, nir_src src2)
+nir_src_is_dynamically_uniform(nir_src src)
{
- if (src1.is_ssa) {
- if (src2.is_ssa) {
- return src1.ssa == src2.ssa;
- } else {
- return false;
- }
- } else {
- if (src2.is_ssa) {
- return false;
- } else {
- if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL))
- return false;
+ if (!src.is_ssa)
+ return false;
- if (src1.reg.indirect) {
- if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect))
- return false;
- }
+ /* Constants are trivially dynamically uniform */
+ if (src.ssa->parent_instr->type == nir_instr_type_load_const)
+ return true;
- return src1.reg.reg == src2.reg.reg &&
- src1.reg.base_offset == src2.reg.base_offset;
- }
+ /* As are uniform variables */
+ if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
+
+ if (intr->intrinsic == nir_intrinsic_load_uniform)
+ return true;
}
+
+ /* XXX: this could have many more tests, such as when a sampler function is
+ * called with dynamically uniform arguments.
+ */
+ return false;
}
static void
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index f7b9483d74a..825c34805c4 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -35,7 +35,7 @@
#include "util/set.h"
#include "util/bitset.h"
#include "nir_types.h"
-#include "glsl/shader_enums.h"
+#include "shader_enums.h"
#include <stdio.h>
#include "nir_opcodes.h"
@@ -738,7 +738,7 @@ nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
* used for a source
*/
static inline unsigned
-nir_ssa_alu_instr_src_components(nir_alu_instr *instr, unsigned src)
+nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
{
assert(instr->dest.dest.is_ssa);
@@ -1486,6 +1486,9 @@ typedef struct nir_shader_compiler_options {
typedef struct nir_shader_info {
const char *name;
+ /* Descriptive name provided by the client; may be NULL */
+ const char *label;
+
/* Number of textures used by this shader */
unsigned num_textures;
/* Number of uniform buffers used by this shader */
@@ -1516,13 +1519,32 @@ typedef struct nir_shader_info {
/** Was this shader linked with any transform feedback varyings? */
bool has_transform_feedback_varyings;
- struct {
- /** The maximum number of vertices the geometry shader might write. */
- unsigned vertices_out;
+ union {
+ struct {
+ /** The maximum number of vertices the geometry shader might write. */
+ unsigned vertices_out;
+
+ /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+ unsigned invocations;
+ } gs;
+
+ struct {
+ bool uses_discard;
+
+ /**
+ * Whether early fragment tests are enabled as defined by
+ * ARB_shader_image_load_store.
+ */
+ bool early_fragment_tests;
+
+ /** gl_FragDepth layout for ARB_conservative_depth. */
+ enum gl_frag_depth_layout depth_layout;
+ } fs;
- /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
- unsigned invocations;
- } gs;
+ struct {
+ unsigned local_size[3];
+ } cs;
+ };
} nir_shader_info;
typedef struct nir_shader {
@@ -1585,6 +1607,26 @@ nir_register *nir_local_reg_create(nir_function_impl *impl);
void nir_reg_remove(nir_register *reg);
+/** Adds a variable to the appropreate list in nir_shader */
+void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
+
+static inline void
+nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
+{
+ assert(var->data.mode == nir_var_local);
+ exec_list_push_tail(&impl->locals, &var->node);
+}
+
+/** creates a variable, sets a few defaults, and adds it to the list */
+nir_variable *nir_variable_create(nir_shader *shader,
+ nir_variable_mode mode,
+ const struct glsl_type *type,
+ const char *name);
+/** creates a local variable and adds it to the list */
+nir_variable *nir_local_variable_create(nir_function_impl *impl,
+ const struct glsl_type *type,
+ const char *name);
+
/** creates a function and adds it to the shader's list of functions */
nir_function *nir_function_create(nir_shader *shader, const char *name);
@@ -1821,6 +1863,7 @@ bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
nir_const_value *nir_src_as_const_value(nir_src src);
+bool nir_src_is_dynamically_uniform(nir_src src);
bool nir_srcs_equal(nir_src src1, nir_src src2);
void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py
index 8fd9b1039a7..2ba8554645d 100644
--- a/src/glsl/nir/nir_constant_expressions.py
+++ b/src/glsl/nir/nir_constant_expressions.py
@@ -29,6 +29,7 @@ template = """\
#include <math.h>
#include "main/core.h"
#include "util/rounding.h" /* for _mesa_roundeven */
+#include "util/half_float.h"
#include "nir_constant_expressions.h"
#if defined(__SUNPRO_CC)
diff --git a/src/glsl/nir/nir_instr_set.c b/src/glsl/nir/nir_instr_set.c
new file mode 100644
index 00000000000..d3f939fe805
--- /dev/null
+++ b/src/glsl/nir/nir_instr_set.c
@@ -0,0 +1,519 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir_instr_set.h"
+#include "nir_vla.h"
+
+#define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
+
+static uint32_t
+hash_src(uint32_t hash, const nir_src *src)
+{
+ assert(src->is_ssa);
+ hash = HASH(hash, src->ssa);
+ return hash;
+}
+
+static uint32_t
+hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components)
+{
+ hash = HASH(hash, src->abs);
+ hash = HASH(hash, src->negate);
+
+ for (unsigned i = 0; i < num_components; i++)
+ hash = HASH(hash, src->swizzle[i]);
+
+ hash = hash_src(hash, &src->src);
+ return hash;
+}
+
+static uint32_t
+hash_alu(uint32_t hash, const nir_alu_instr *instr)
+{
+ hash = HASH(hash, instr->op);
+ hash = HASH(hash, instr->dest.dest.ssa.num_components);
+
+ if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[instr->op].num_inputs == 2);
+ uint32_t hash0 = hash_alu_src(hash, &instr->src[0],
+ nir_ssa_alu_instr_src_components(instr, 0));
+ uint32_t hash1 = hash_alu_src(hash, &instr->src[1],
+ nir_ssa_alu_instr_src_components(instr, 1));
+ /* For commutative operations, we need some commutative way of
+ * combining the hashes. One option would be to XOR them but that
+ * means that anything with two identical sources will hash to 0 and
+ * that's common enough we probably don't want the guaranteed
+ * collision. Either addition or multiplication will also work.
+ */
+ hash = hash0 * hash1;
+ } else {
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ hash = hash_alu_src(hash, &instr->src[i],
+ nir_ssa_alu_instr_src_components(instr, i));
+ }
+ }
+
+ return hash;
+}
+
+static uint32_t
+hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
+{
+ hash = HASH(hash, instr->def.num_components);
+
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f,
+ instr->def.num_components
+ * sizeof(instr->value.f[0]));
+
+ return hash;
+}
+
+static int
+cmp_phi_src(const void *data1, const void *data2)
+{
+ nir_phi_src *src1 = *(nir_phi_src **)data1;
+ nir_phi_src *src2 = *(nir_phi_src **)data2;
+ return src1->pred - src2->pred;
+}
+
+static uint32_t
+hash_phi(uint32_t hash, const nir_phi_instr *instr)
+{
+ hash = HASH(hash, instr->instr.block);
+
+ /* sort sources by predecessor, since the order shouldn't matter */
+ unsigned num_preds = instr->instr.block->predecessors->entries;
+ NIR_VLA(nir_phi_src *, srcs, num_preds);
+ unsigned i = 0;
+ nir_foreach_phi_src(instr, src) {
+ srcs[i++] = src;
+ }
+
+ qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src);
+
+ for (i = 0; i < num_preds; i++) {
+ hash = hash_src(hash, &srcs[i]->src);
+ hash = HASH(hash, srcs[i]->pred);
+ }
+
+ return hash;
+}
+
+static uint32_t
+hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr)
+{
+ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+ hash = HASH(hash, instr->intrinsic);
+
+ if (info->has_dest)
+ hash = HASH(hash, instr->dest.ssa.num_components);
+
+ assert(info->num_variables == 0);
+
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index,
+ info->num_indices
+ * sizeof(instr->const_index[0]));
+ return hash;
+}
+
+static uint32_t
+hash_tex(uint32_t hash, const nir_tex_instr *instr)
+{
+ hash = HASH(hash, instr->op);
+ hash = HASH(hash, instr->num_srcs);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ hash = HASH(hash, instr->src[i].src_type);
+ hash = hash_src(hash, &instr->src[i].src);
+ }
+
+ hash = HASH(hash, instr->coord_components);
+ hash = HASH(hash, instr->sampler_dim);
+ hash = HASH(hash, instr->is_array);
+ hash = HASH(hash, instr->is_shadow);
+ hash = HASH(hash, instr->is_new_style_shadow);
+ hash = HASH(hash, instr->const_offset);
+ unsigned component = instr->component;
+ hash = HASH(hash, component);
+ hash = HASH(hash, instr->sampler_index);
+ hash = HASH(hash, instr->sampler_array_size);
+
+ assert(!instr->sampler);
+
+ return hash;
+}
+
+/* Computes a hash of an instruction for use in a hash table. Note that this
+ * will only work for instructions where instr_can_rewrite() returns true, and
+ * it should return identical hashes for two instructions that are the same
+ * according nir_instrs_equal().
+ */
+
+static uint32_t
+hash_instr(const void *data)
+{
+ const nir_instr *instr = data;
+ uint32_t hash = _mesa_fnv32_1a_offset_bias;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ hash = hash_alu(hash, nir_instr_as_alu(instr));
+ break;
+ case nir_instr_type_load_const:
+ hash = hash_load_const(hash, nir_instr_as_load_const(instr));
+ break;
+ case nir_instr_type_phi:
+ hash = hash_phi(hash, nir_instr_as_phi(instr));
+ break;
+ case nir_instr_type_intrinsic:
+ hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr));
+ break;
+ case nir_instr_type_tex:
+ hash = hash_tex(hash, nir_instr_as_tex(instr));
+ break;
+ default:
+ unreachable("Invalid instruction type");
+ }
+
+ return hash;
+}
+
+bool
+nir_srcs_equal(nir_src src1, nir_src src2)
+{
+ if (src1.is_ssa) {
+ if (src2.is_ssa) {
+ return src1.ssa == src2.ssa;
+ } else {
+ return false;
+ }
+ } else {
+ if (src2.is_ssa) {
+ return false;
+ } else {
+ if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL))
+ return false;
+
+ if (src1.reg.indirect) {
+ if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect))
+ return false;
+ }
+
+ return src1.reg.reg == src2.reg.reg &&
+ src1.reg.base_offset == src2.reg.base_offset;
+ }
+ }
+}
+
+static bool
+nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
+ unsigned src1, unsigned src2)
+{
+ if (alu1->src[src1].abs != alu2->src[src2].abs ||
+ alu1->src[src1].negate != alu2->src[src2].negate)
+ return false;
+
+ for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
+ if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
+ return false;
+ }
+
+ return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
+}
+
+/* Returns "true" if two instructions are equal. Note that this will only
+ * work for the subset of instructions defined by instr_can_rewrite(). Also,
+ * it should only return "true" for instructions that hash_instr() will return
+ * the same hash for (ignoring collisions, of course).
+ */
+
+static bool
+nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
+{
+ if (instr1->type != instr2->type)
+ return false;
+
+ switch (instr1->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
+ nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
+
+ if (alu1->op != alu2->op)
+ return false;
+
+ /* TODO: We can probably acutally do something more inteligent such
+ * as allowing different numbers and taking a maximum or something
+ * here */
+ if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
+ return false;
+
+ if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[alu1->op].num_inputs == 2);
+ return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
+ (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 0));
+ } else {
+ for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+ if (!nir_alu_srcs_equal(alu1, alu2, i, i))
+ return false;
+ }
+ }
+ return true;
+ }
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex1 = nir_instr_as_tex(instr1);
+ nir_tex_instr *tex2 = nir_instr_as_tex(instr2);
+
+ if (tex1->op != tex2->op)
+ return false;
+
+ if (tex1->num_srcs != tex2->num_srcs)
+ return false;
+ for (unsigned i = 0; i < tex1->num_srcs; i++) {
+ if (tex1->src[i].src_type != tex2->src[i].src_type ||
+ !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) {
+ return false;
+ }
+ }
+
+ if (tex1->coord_components != tex2->coord_components ||
+ tex1->sampler_dim != tex2->sampler_dim ||
+ tex1->is_array != tex2->is_array ||
+ tex1->is_shadow != tex2->is_shadow ||
+ tex1->is_new_style_shadow != tex2->is_new_style_shadow ||
+ memcmp(tex1->const_offset, tex2->const_offset,
+ sizeof(tex1->const_offset)) != 0 ||
+ tex1->component != tex2->component ||
+ tex1->sampler_index != tex2->sampler_index ||
+ tex1->sampler_array_size != tex2->sampler_array_size) {
+ return false;
+ }
+
+ /* Don't support un-lowered sampler derefs currently. */
+ assert(!tex1->sampler && !tex2->sampler);
+
+ return true;
+ }
+ case nir_instr_type_load_const: {
+ nir_load_const_instr *load1 = nir_instr_as_load_const(instr1);
+ nir_load_const_instr *load2 = nir_instr_as_load_const(instr2);
+
+ if (load1->def.num_components != load2->def.num_components)
+ return false;
+
+ return memcmp(load1->value.f, load2->value.f,
+ load1->def.num_components * sizeof(*load2->value.f)) == 0;
+ }
+ case nir_instr_type_phi: {
+ nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
+ nir_phi_instr *phi2 = nir_instr_as_phi(instr2);
+
+ if (phi1->instr.block != phi2->instr.block)
+ return false;
+
+ nir_foreach_phi_src(phi1, src1) {
+ nir_foreach_phi_src(phi2, src2) {
+ if (src1->pred == src2->pred) {
+ if (!nir_srcs_equal(src1->src, src2->src))
+ return false;
+
+ break;
+ }
+ }
+ }
+
+ return true;
+ }
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1);
+ nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2);
+ const nir_intrinsic_info *info =
+ &nir_intrinsic_infos[intrinsic1->intrinsic];
+
+ if (intrinsic1->intrinsic != intrinsic2->intrinsic ||
+ intrinsic1->num_components != intrinsic2->num_components)
+ return false;
+
+ if (info->has_dest && intrinsic1->dest.ssa.num_components !=
+ intrinsic2->dest.ssa.num_components)
+ return false;
+
+ for (unsigned i = 0; i < info->num_srcs; i++) {
+ if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
+ return false;
+ }
+
+ assert(info->num_variables == 0);
+
+ for (unsigned i = 0; i < info->num_indices; i++) {
+ if (intrinsic1->const_index[i] != intrinsic2->const_index[i])
+ return false;
+ }
+
+ return true;
+ }
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ case nir_instr_type_parallel_copy:
+ default:
+ unreachable("Invalid instruction type");
+ }
+
+ return false;
+}
+
+static bool
+src_is_ssa(nir_src *src, void *data)
+{
+ (void) data;
+ return src->is_ssa;
+}
+
+static bool
+dest_is_ssa(nir_dest *dest, void *data)
+{
+ (void) data;
+ return dest->is_ssa;
+}
+
+/* This function determines if uses of an instruction can safely be rewritten
+ * to use another identical instruction instead. Note that this function must
+ * be kept in sync with hash_instr() and nir_instrs_equal() -- only
+ * instructions that pass this test will be handed on to those functions, and
+ * conversely they must handle everything that this function returns true for.
+ */
+
+static bool
+instr_can_rewrite(nir_instr *instr)
+{
+ /* We only handle SSA. */
+ if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
+ !nir_foreach_src(instr, src_is_ssa, NULL))
+ return false;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ case nir_instr_type_load_const:
+ case nir_instr_type_phi:
+ return true;
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ /* Don't support un-lowered sampler derefs currently. */
+ if (tex->sampler)
+ return false;
+
+ return true;
+ }
+ case nir_instr_type_intrinsic: {
+ const nir_intrinsic_info *info =
+ &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+ return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+ (info->flags & NIR_INTRINSIC_CAN_REORDER) &&
+ info->num_variables == 0; /* not implemented yet */
+ }
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ return false;
+ case nir_instr_type_parallel_copy:
+ default:
+ unreachable("Invalid instruction type");
+ }
+
+ return false;
+}
+
+static nir_ssa_def *
+nir_instr_get_dest_ssa_def(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
+ return &nir_instr_as_alu(instr)->dest.dest.ssa;
+ case nir_instr_type_load_const:
+ return &nir_instr_as_load_const(instr)->def;
+ case nir_instr_type_phi:
+ assert(nir_instr_as_phi(instr)->dest.is_ssa);
+ return &nir_instr_as_phi(instr)->dest.ssa;
+ case nir_instr_type_intrinsic:
+ assert(nir_instr_as_intrinsic(instr)->dest.is_ssa);
+ return &nir_instr_as_intrinsic(instr)->dest.ssa;
+ case nir_instr_type_tex:
+ assert(nir_instr_as_tex(instr)->dest.is_ssa);
+ return &nir_instr_as_tex(instr)->dest.ssa;
+ default:
+ unreachable("We never ask for any of these");
+ }
+}
+
+static bool
+cmp_func(const void *data1, const void *data2)
+{
+ return nir_instrs_equal(data1, data2);
+}
+
+struct set *
+nir_instr_set_create(void *mem_ctx)
+{
+ return _mesa_set_create(mem_ctx, hash_instr, cmp_func);
+}
+
+void
+nir_instr_set_destroy(struct set *instr_set)
+{
+ _mesa_set_destroy(instr_set, NULL);
+}
+
+bool
+nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr)
+{
+ if (!instr_can_rewrite(instr))
+ return false;
+
+ struct set_entry *entry = _mesa_set_search(instr_set, instr);
+ if (entry) {
+ nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr);
+ nir_ssa_def *new_def =
+ nir_instr_get_dest_ssa_def((nir_instr *) entry->key);
+ nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
+ return true;
+ }
+
+ _mesa_set_add(instr_set, instr);
+ return false;
+}
+
+void
+nir_instr_set_remove(struct set *instr_set, nir_instr *instr)
+{
+ if (!instr_can_rewrite(instr))
+ return;
+
+ struct set_entry *entry = _mesa_set_search(instr_set, instr);
+ if (entry)
+ _mesa_set_remove(instr_set, entry);
+}
+
diff --git a/src/glsl/nir/nir_instr_set.h b/src/glsl/nir/nir_instr_set.h
new file mode 100644
index 00000000000..939e8ddbf58
--- /dev/null
+++ b/src/glsl/nir/nir_instr_set.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "nir.h"
+
+/**
+ * This file defines functions for creating, destroying, and manipulating an
+ * "instruction set," which is an abstraction for finding duplicate
+ * instructions using a hash set. Note that the question of whether an
+ * instruction is actually a duplicate (e.g. whether it has any side effects)
+ * is handled transparently. The user can pass any instruction to
+ * nir_instr_set_add_or_rewrite() and nir_instr_set_remove(), and if the
+ * instruction isn't safe to rewrite or isn't supported, it's silently
+ * removed.
+ */
+
+/*@{*/
+
+/** Creates an instruction set, using a given ralloc mem_ctx */
+struct set *nir_instr_set_create(void *mem_ctx);
+
+/** Destroys an instruction set. */
+void nir_instr_set_destroy(struct set *instr_set);
+
+/**
+ * Adds an instruction to an instruction set if it doesn't exist, or if it
+ * does already exist, rewrites all uses of it to point to the other
+ * already-inserted instruction. Returns 'true' if the uses of the instruction
+ * were rewritten.
+ */
+bool nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr);
+
+/**
+ * Removes an instruction from an instruction set, so that other instructions
+ * won't be merged with it.
+ */
+void nir_instr_set_remove(struct set *instr_set, nir_instr *instr);
+
+/*@}*/
+
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index b5a0d715aa3..68a18b9c11a 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -174,8 +174,10 @@ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
* 3: For CompSwap only: the second data parameter.
*/
INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_min, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_max, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c
index 6f9ecc019ec..46e137652a1 100644
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -72,20 +72,22 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
nir_ssa_def *offset_def = &offset_const->def;
- if (instr->variables[0]->deref.child != NULL) {
- assert(instr->variables[0]->deref.child->deref_type ==
- nir_deref_type_array);
- nir_deref_array *deref_array =
- nir_deref_as_array(instr->variables[0]->deref.child);
- assert(deref_array->deref.child == NULL);
+ nir_deref *tail = &instr->variables[0]->deref;
+ while (tail->child != NULL) {
+ assert(tail->child->deref_type == nir_deref_type_array);
+ nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+ tail = tail->child;
- offset_const->value.u[0] +=
- deref_array->base_offset * ATOMIC_COUNTER_SIZE;
+ unsigned child_array_elements = tail->child != NULL ?
+ glsl_get_aoa_size(tail->type) : 1;
+
+ offset_const->value.u[0] += deref_array->base_offset *
+ child_array_elements * ATOMIC_COUNTER_SIZE;
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
nir_load_const_instr *atomic_counter_size =
nir_load_const_instr_create(mem_ctx, 1);
- atomic_counter_size->value.u[0] = ATOMIC_COUNTER_SIZE;
+ atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
@@ -102,7 +104,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
add->src[0].src.is_ssa = true;
add->src[0].src.ssa = &mul->dest.dest.ssa;
add->src[1].src.is_ssa = true;
- add->src[1].src.ssa = &offset_const->def;
+ add->src[1].src.ssa = offset_def;
nir_instr_insert_before(&instr->instr, &add->instr);
offset_def = &add->dest.dest.ssa;
diff --git a/src/glsl/nir/nir_opt_cse.c b/src/glsl/nir/nir_opt_cse.c
index 64c94afd480..93a6635337a 100644
--- a/src/glsl/nir/nir_opt_cse.c
+++ b/src/glsl/nir/nir_opt_cse.c
@@ -22,306 +22,60 @@
*
* Authors:
* Jason Ekstrand ([email protected])
+ * Connor Abbott ([email protected])
*
*/
-#include "nir.h"
+#include "nir_instr_set.h"
/*
* Implements common subexpression elimination
*/
-struct cse_state {
- void *mem_ctx;
- bool progress;
-};
-
-static bool
-nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1,
- unsigned src2)
-{
- if (alu1->src[src1].abs != alu2->src[src2].abs ||
- alu1->src[src1].negate != alu2->src[src2].negate)
- return false;
-
- for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
- if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
- return false;
- }
-
- return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
-}
-
-static bool
-nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
-{
- if (instr1->type != instr2->type)
- return false;
-
- switch (instr1->type) {
- case nir_instr_type_alu: {
- nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
- nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
-
- if (alu1->op != alu2->op)
- return false;
-
- /* TODO: We can probably acutally do something more inteligent such
- * as allowing different numbers and taking a maximum or something
- * here */
- if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
- return false;
-
- if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
- assert(nir_op_infos[alu1->op].num_inputs == 2);
- return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
- nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
- (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
- nir_alu_srcs_equal(alu1, alu2, 1, 0));
- } else {
- for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
- if (!nir_alu_srcs_equal(alu1, alu2, i, i))
- return false;
- }
- }
- return true;
- }
- case nir_instr_type_tex: {
- nir_tex_instr *tex1 = nir_instr_as_tex(instr1);
- nir_tex_instr *tex2 = nir_instr_as_tex(instr2);
-
- if (tex1->op != tex2->op)
- return false;
-
- if (tex1->num_srcs != tex2->num_srcs)
- return false;
- for (unsigned i = 0; i < tex1->num_srcs; i++) {
- if (tex1->src[i].src_type != tex2->src[i].src_type ||
- !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) {
- return false;
- }
- }
-
- if (tex1->coord_components != tex2->coord_components ||
- tex1->sampler_dim != tex2->sampler_dim ||
- tex1->is_array != tex2->is_array ||
- tex1->is_shadow != tex2->is_shadow ||
- tex1->is_new_style_shadow != tex2->is_new_style_shadow ||
- memcmp(tex1->const_offset, tex2->const_offset,
- sizeof(tex1->const_offset)) != 0 ||
- tex1->component != tex2->component ||
- tex1->sampler_index != tex2->sampler_index ||
- tex1->sampler_array_size != tex2->sampler_array_size) {
- return false;
- }
-
- /* Don't support un-lowered sampler derefs currently. */
- if (tex1->sampler || tex2->sampler)
- return false;
-
- return true;
- }
- case nir_instr_type_load_const: {
- nir_load_const_instr *load1 = nir_instr_as_load_const(instr1);
- nir_load_const_instr *load2 = nir_instr_as_load_const(instr2);
-
- if (load1->def.num_components != load2->def.num_components)
- return false;
-
- return memcmp(load1->value.f, load2->value.f,
- load1->def.num_components * sizeof(*load2->value.f)) == 0;
- }
- case nir_instr_type_phi: {
- nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
- nir_phi_instr *phi2 = nir_instr_as_phi(instr2);
-
- if (phi1->instr.block != phi2->instr.block)
- return false;
-
- nir_foreach_phi_src(phi1, src1) {
- nir_foreach_phi_src(phi2, src2) {
- if (src1->pred == src2->pred) {
- if (!nir_srcs_equal(src1->src, src2->src))
- return false;
-
- break;
- }
- }
- }
-
- return true;
- }
- case nir_instr_type_intrinsic: {
- nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1);
- nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2);
- const nir_intrinsic_info *info =
- &nir_intrinsic_infos[intrinsic1->intrinsic];
-
- if (intrinsic1->intrinsic != intrinsic2->intrinsic ||
- intrinsic1->num_components != intrinsic2->num_components)
- return false;
-
- if (info->has_dest && intrinsic1->dest.ssa.num_components !=
- intrinsic2->dest.ssa.num_components)
- return false;
-
- for (unsigned i = 0; i < info->num_srcs; i++) {
- if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
- return false;
- }
-
- assert(info->num_variables == 0);
-
- for (unsigned i = 0; i < info->num_indices; i++) {
- if (intrinsic1->const_index[i] != intrinsic2->const_index[i])
- return false;
- }
-
- return true;
- }
- case nir_instr_type_call:
- case nir_instr_type_jump:
- case nir_instr_type_ssa_undef:
- case nir_instr_type_parallel_copy:
- default:
- unreachable("Invalid instruction type");
- }
-
- return false;
-}
-
-static bool
-src_is_ssa(nir_src *src, void *data)
-{
- (void) data;
- return src->is_ssa;
-}
-
-static bool
-dest_is_ssa(nir_dest *dest, void *data)
-{
- (void) data;
- return dest->is_ssa;
-}
+/*
+ * Visits and CSE's the given block and all its descendants in the dominance
+ * tree recursively. Note that the instr_set is guaranteed to only ever
+ * contain instructions that dominate the current block.
+ */
static bool
-nir_instr_can_cse(nir_instr *instr)
-{
- /* We only handle SSA. */
- if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
- !nir_foreach_src(instr, src_is_ssa, NULL))
- return false;
-
- switch (instr->type) {
- case nir_instr_type_alu:
- case nir_instr_type_tex:
- case nir_instr_type_load_const:
- case nir_instr_type_phi:
- return true;
- case nir_instr_type_intrinsic: {
- const nir_intrinsic_info *info =
- &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
- return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
- (info->flags & NIR_INTRINSIC_CAN_REORDER) &&
- info->num_variables == 0; /* not implemented yet */
- }
- case nir_instr_type_call:
- case nir_instr_type_jump:
- case nir_instr_type_ssa_undef:
- return false;
- case nir_instr_type_parallel_copy:
- default:
- unreachable("Invalid instruction type");
- }
-
- return false;
-}
-
-static nir_ssa_def *
-nir_instr_get_dest_ssa_def(nir_instr *instr)
+cse_block(nir_block *block, struct set *instr_set)
{
- switch (instr->type) {
- case nir_instr_type_alu:
- assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
- return &nir_instr_as_alu(instr)->dest.dest.ssa;
- case nir_instr_type_tex:
- assert(nir_instr_as_tex(instr)->dest.is_ssa);
- return &nir_instr_as_tex(instr)->dest.ssa;
- case nir_instr_type_load_const:
- return &nir_instr_as_load_const(instr)->def;
- case nir_instr_type_phi:
- assert(nir_instr_as_phi(instr)->dest.is_ssa);
- return &nir_instr_as_phi(instr)->dest.ssa;
- case nir_instr_type_intrinsic:
- assert(nir_instr_as_intrinsic(instr)->dest.is_ssa);
- return &nir_instr_as_intrinsic(instr)->dest.ssa;
- default:
- unreachable("We never ask for any of these");
- }
-}
-
-static void
-nir_opt_cse_instr(nir_instr *instr, struct cse_state *state)
-{
- if (!nir_instr_can_cse(instr))
- return;
+ bool progress = false;
- for (struct exec_node *node = instr->node.prev;
- !exec_node_is_head_sentinel(node); node = node->prev) {
- nir_instr *other = exec_node_data(nir_instr, node, node);
- if (nir_instrs_equal(instr, other)) {
- nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
- nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
- nir_src_for_ssa(other_def));
+ nir_foreach_instr_safe(block, instr) {
+ if (nir_instr_set_add_or_rewrite(instr_set, instr)) {
+ progress = true;
nir_instr_remove(instr);
- state->progress = true;
- return;
}
}
- for (nir_block *block = instr->block->imm_dom;
- block != NULL; block = block->imm_dom) {
- nir_foreach_instr_reverse(block, other) {
- if (nir_instrs_equal(instr, other)) {
- nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
- nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
- nir_src_for_ssa(other_def));
- nir_instr_remove(instr);
- state->progress = true;
- return;
- }
- }
+ for (unsigned i = 0; i < block->num_dom_children; i++) {
+ nir_block *child = block->dom_children[i];
+ progress |= cse_block(child, instr_set);
}
-}
-
-static bool
-nir_opt_cse_block(nir_block *block, void *void_state)
-{
- struct cse_state *state = void_state;
- nir_foreach_instr_safe(block, instr)
- nir_opt_cse_instr(instr, state);
+ nir_foreach_instr(block, instr)
+ nir_instr_set_remove(instr_set, instr);
- return true;
+ return progress;
}
static bool
nir_opt_cse_impl(nir_function_impl *impl)
{
- struct cse_state state;
-
- state.mem_ctx = ralloc_parent(impl);
- state.progress = false;
+ struct set *instr_set = nir_instr_set_create(NULL);
nir_metadata_require(impl, nir_metadata_dominance);
- nir_foreach_block(impl, nir_opt_cse_block, &state);
+ bool progress = cse_block(nir_start_block(impl), instr_set);
- if (state.progress)
+ if (progress)
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
- return state.progress;
+ nir_instr_set_destroy(instr_set);
+ return progress;
}
bool
@@ -336,3 +90,4 @@ nir_opt_cse(nir_shader *shader)
return progress;
}
+
diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c
index b6ce43b5224..5a22f509f50 100644
--- a/src/glsl/nir/nir_sweep.c
+++ b/src/glsl/nir/nir_sweep.c
@@ -155,6 +155,8 @@ nir_sweep(nir_shader *nir)
ralloc_adopt(rubbish, nir);
ralloc_steal(nir, (char *)nir->info.name);
+ if (nir->info.label)
+ ralloc_steal(nir, (char *)nir->info.label);
/* Variables and registers are not dead. Steal them back. */
steal_list(nir, nir_variable, &nir->uniforms);
diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp
index 01f0e9b5abc..4a1250e546c 100644
--- a/src/glsl/nir/nir_types.cpp
+++ b/src/glsl/nir/nir_types.cpp
@@ -118,6 +118,12 @@ glsl_get_length(const struct glsl_type *type)
return type->is_matrix() ? type->matrix_columns : type->length;
}
+unsigned
+glsl_get_aoa_size(const struct glsl_type *type)
+{
+ return type->arrays_of_arrays_size();
+}
+
const char *
glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index)
{
diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h
index 1a0cb1fb774..a61af6cba75 100644
--- a/src/glsl/nir/nir_types.h
+++ b/src/glsl/nir/nir_types.h
@@ -31,7 +31,7 @@
/* C wrapper around glsl_types.h */
-#include "../glsl_types.h"
+#include "glsl_types.h"
#ifdef __cplusplus
extern "C" {
@@ -65,6 +65,8 @@ unsigned glsl_get_matrix_columns(const struct glsl_type *type);
unsigned glsl_get_length(const struct glsl_type *type);
+unsigned glsl_get_aoa_size(const struct glsl_type *type);
+
const char *glsl_get_struct_elem_name(const struct glsl_type *type,
unsigned index);
diff --git a/src/glsl/shader_enums.c b/src/glsl/nir/shader_enums.c
index c196b791d4f..66a25e72344 100644
--- a/src/glsl/shader_enums.c
+++ b/src/glsl/nir/shader_enums.c
@@ -26,8 +26,9 @@
* Rob Clark <[email protected]>
*/
-#include "glsl/shader_enums.h"
+#include "shader_enums.h"
#include "util/macros.h"
+#include "mesa/main/config.h"
#define ENUM(x) [x] = #x
#define NAME(val) ((((val) < ARRAY_SIZE(names)) && names[(val)]) ? names[(val)] : "UNKNOWN")
@@ -42,6 +43,7 @@ const char * gl_shader_stage_name(gl_shader_stage stage)
ENUM(MESA_SHADER_FRAGMENT),
ENUM(MESA_SHADER_COMPUTE),
};
+ STATIC_ASSERT(ARRAY_SIZE(names) == MESA_SHADER_STAGES);
return NAME(stage);
}
@@ -82,6 +84,7 @@ const char * gl_vert_attrib_name(gl_vert_attrib attrib)
ENUM(VERT_ATTRIB_GENERIC14),
ENUM(VERT_ATTRIB_GENERIC15),
};
+ STATIC_ASSERT(ARRAY_SIZE(names) == VERT_ATTRIB_MAX);
return NAME(attrib);
}
@@ -147,6 +150,7 @@ const char * gl_varying_slot_name(gl_varying_slot slot)
ENUM(VARYING_SLOT_VAR30),
ENUM(VARYING_SLOT_VAR31),
};
+ STATIC_ASSERT(ARRAY_SIZE(names) == VARYING_SLOT_MAX);
return NAME(slot);
}
@@ -169,8 +173,10 @@ const char * gl_system_value_name(gl_system_value sysval)
ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER),
ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
+ ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS),
ENUM(SYSTEM_VALUE_VERTEX_CNT),
};
+ STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
return NAME(sysval);
}
@@ -182,6 +188,7 @@ const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
ENUM(INTERP_QUALIFIER_FLAT),
ENUM(INTERP_QUALIFIER_NOPERSPECTIVE),
};
+ STATIC_ASSERT(ARRAY_SIZE(names) == INTERP_QUALIFIER_COUNT);
return NAME(qual);
}
@@ -201,5 +208,6 @@ const char * gl_frag_result_name(gl_frag_result result)
ENUM(FRAG_RESULT_DATA6),
ENUM(FRAG_RESULT_DATA7),
};
+ STATIC_ASSERT(ARRAY_SIZE(names) == FRAG_RESULT_MAX);
return NAME(result);
}
diff --git a/src/glsl/shader_enums.h b/src/glsl/nir/shader_enums.h
index 2a5d2c5bfa7..d1cf7ca04cc 100644
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/nir/shader_enums.h
@@ -233,6 +233,11 @@ typedef enum
VARYING_SLOT_VAR31,
} gl_varying_slot;
+
+#define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING)
+#define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX)
+#define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING)
+
const char * gl_varying_slot_name(gl_varying_slot slot);
/**
@@ -473,4 +478,23 @@ typedef enum
const char * gl_frag_result_name(gl_frag_result result);
+#define FRAG_RESULT_MAX (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
+
+/**
+ * \brief Layout qualifiers for gl_FragDepth.
+ *
+ * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with
+ * a layout qualifier.
+ *
+ * \see enum ir_depth_layout
+ */
+enum gl_frag_depth_layout
+{
+ FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */
+ FRAG_DEPTH_LAYOUT_ANY,
+ FRAG_DEPTH_LAYOUT_GREATER,
+ FRAG_DEPTH_LAYOUT_LESS,
+ FRAG_DEPTH_LAYOUT_UNCHANGED
+};
+
#endif /* SHADER_ENUMS_H */
diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp
index 2cb7f41adef..c5be166e75a 100644
--- a/src/glsl/opt_dead_code.cpp
+++ b/src/glsl/opt_dead_code.cpp
@@ -75,24 +75,35 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
|| !entry->declaration)
continue;
- if (entry->assign) {
- /* Remove a single dead assignment to the variable we found.
- * Don't do so if it's a shader or function output or a shader
- * storage variable though.
+ if (!entry->assign_list.is_empty()) {
+ /* Remove all the dead assignments to the variable we found.
+ * Don't do so if it's a shader or function output, though.
*/
if (entry->var->data.mode != ir_var_function_out &&
entry->var->data.mode != ir_var_function_inout &&
entry->var->data.mode != ir_var_shader_out &&
entry->var->data.mode != ir_var_shader_storage) {
- entry->assign->remove();
- progress = true;
- if (debug) {
- printf("Removed assignment to %s@%p\n",
- entry->var->name, (void *) entry->var);
- }
+ while (!entry->assign_list.is_empty()) {
+ struct assignment_entry *assignment_entry =
+ exec_node_data(struct assignment_entry,
+ entry->assign_list.head, link);
+
+ assignment_entry->assign->remove();
+
+ if (debug) {
+ printf("Removed assignment to %s@%p\n",
+ entry->var->name, (void *) entry->var);
+ }
+
+ assignment_entry->link.remove();
+ free(assignment_entry);
+ }
+ progress = true;
}
- } else {
+ }
+
+ if (entry->assign_list.is_empty()) {
/* If there are no assignments or references to the variable left,
* then we can remove its declaration.
*/
@@ -103,7 +114,7 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
*/
if (entry->var->data.mode == ir_var_uniform ||
entry->var->data.mode == ir_var_shader_storage) {
- if (uniform_locations_assigned || entry->var->constant_value)
+ if (uniform_locations_assigned || entry->var->constant_initializer)
continue;
/* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index a7a219c55ca..e38a0e93058 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -373,8 +373,6 @@ tree_grafting_basic_block(ir_instruction *bb_first,
entry->referenced_count != 2)
continue;
- assert(assign == entry->assign);
-
/* Found a possibly graftable assignment. Now, walk through the
* rest of the BB seeing if the deref is here, and if nothing interfered with
* pasting its expression's values in between.
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
index 05140192893..3a95360eda6 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -113,9 +113,18 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
ralloc_free(shProg->InfoLog);
shProg->InfoLog = ralloc_strdup(shProg, "");
+ ralloc_free(shProg->BufferInterfaceBlocks);
+ shProg->BufferInterfaceBlocks = NULL;
+ shProg->NumBufferInterfaceBlocks = 0;
+
ralloc_free(shProg->UniformBlocks);
shProg->UniformBlocks = NULL;
- shProg->NumBufferInterfaceBlocks = 0;
+ shProg->NumUniformBlocks = 0;
+
+ ralloc_free(shProg->ShaderStorageBlocks);
+ shProg->ShaderStorageBlocks = NULL;
+ shProg->NumShaderStorageBlocks = 0;
+
for (i = 0; i < MESA_SHADER_STAGES; i++) {
ralloc_free(shProg->UniformBlockStageIndex[i]);
shProg->UniformBlockStageIndex[i] = NULL;
diff --git a/src/mesa/Android.libmesa_dricore.mk b/src/mesa/Android.libmesa_dricore.mk
index 2e308b83733..cd31e148222 100644
--- a/src/mesa/Android.libmesa_dricore.mk
+++ b/src/mesa/Android.libmesa_dricore.mk
@@ -50,7 +50,7 @@ endif # MESA_ENABLE_ASM
ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
LOCAL_SRC_FILES += \
main/streaming-load-memcpy.c \
- mesa/main/sse_minmax.c
+ main/sse_minmax.c
LOCAL_CFLAGS := \
-msse4.1 \
-DUSE_SSE41
@@ -60,6 +60,7 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa/main \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/gallium/include \
$(MESA_TOP)/src/gallium/auxiliary
diff --git a/src/mesa/Android.libmesa_glsl_utils.mk b/src/mesa/Android.libmesa_glsl_utils.mk
index ed620ac648c..9e150eaa3c0 100644
--- a/src/mesa/Android.libmesa_glsl_utils.mk
+++ b/src/mesa/Android.libmesa_glsl_utils.mk
@@ -37,6 +37,7 @@ LOCAL_MODULE := libmesa_glsl_utils
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/gallium/include \
$(MESA_TOP)/src/gallium/auxiliary
@@ -62,6 +63,7 @@ LOCAL_CFLAGS := -D_POSIX_C_SOURCE=199309L
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/gallium/include \
$(MESA_TOP)/src/gallium/auxiliary
diff --git a/src/mesa/Android.libmesa_st_mesa.mk b/src/mesa/Android.libmesa_st_mesa.mk
index b4b7fd97722..427a35f4f6e 100644
--- a/src/mesa/Android.libmesa_st_mesa.mk
+++ b/src/mesa/Android.libmesa_st_mesa.mk
@@ -55,6 +55,7 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa/main \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/gallium/auxiliary \
$(MESA_TOP)/src/gallium/include
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 0915594cea6..34fb4461985 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -415,6 +415,7 @@ STATETRACKER_FILES = \
state_tracker/st_cache.h \
state_tracker/st_cb_bitmap.c \
state_tracker/st_cb_bitmap.h \
+ state_tracker/st_cb_bitmap_shader.c \
state_tracker/st_cb_blit.c \
state_tracker/st_cb_blit.h \
state_tracker/st_cb_bufferobjects.c \
@@ -425,6 +426,7 @@ STATETRACKER_FILES = \
state_tracker/st_cb_condrender.h \
state_tracker/st_cb_drawpixels.c \
state_tracker/st_cb_drawpixels.h \
+ state_tracker/st_cb_drawpixels_shader.c \
state_tracker/st_cb_drawtex.c \
state_tracker/st_cb_drawtex.h \
state_tracker/st_cb_eglimage.c \
@@ -525,9 +527,7 @@ PROGRAM_FILES = \
program/sampler.h \
program/string_to_uint_map.cpp \
program/symbol_table.c \
- program/symbol_table.h \
- ../glsl/shader_enums.c \
- ../glsl/shader_enums.h
+ program/symbol_table.h
PROGRAM_NIR_FILES = \
program/prog_to_nir.c \
@@ -620,6 +620,7 @@ INCLUDE_DIRS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/glsl \
+ -I$(top_srcdir)/src/glsl/nir \
-I$(top_builddir)/src/glsl \
-I$(top_builddir)/src/glsl/nir \
-I$(top_srcdir)/src/glsl/glcpp \
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 5b80a216fef..c986326d2bf 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -16,6 +16,7 @@ env.Append(CPPPATH = [
'#/src',
'#/src/mapi',
'#/src/glsl',
+ '#/src/glsl/nir',
'#/src/mesa',
'#/src/gallium/include',
'#/src/gallium/auxiliary',
diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c
index 33490ee6615..04b9cafe308 100644
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -108,7 +108,11 @@ make_view(struct gl_context *ctx, struct gl_texture_image *tex_image,
return false;
}
+ assert(tex_obj->Target != 0);
+ assert(tex_obj->TargetIndex < NUM_TEXTURE_TARGETS);
+
view_tex_obj->Target = tex_obj->Target;
+ view_tex_obj->TargetIndex = tex_obj->TargetIndex;
*view_tex_image = _mesa_get_tex_image(ctx, view_tex_obj, tex_obj->Target, 0);
@@ -129,7 +133,6 @@ make_view(struct gl_context *ctx, struct gl_texture_image *tex_image,
view_tex_obj->NumLayers = tex_obj->NumLayers;
view_tex_obj->Immutable = tex_obj->Immutable;
view_tex_obj->ImmutableLevels = tex_obj->ImmutableLevels;
- view_tex_obj->Target = tex_obj->Target;
if (ctx->Driver.TextureView != NULL &&
!ctx->Driver.TextureView(ctx, view_tex_obj, tex_obj)) {
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 1a5943c87fb..59d795998c6 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -1315,9 +1315,10 @@ static struct gl_program *
i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id)
{
switch (target) {
- case GL_VERTEX_PROGRAM_ARB:
- return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
- target, id);
+ case GL_VERTEX_PROGRAM_ARB: {
+ struct gl_vertex_program *prog = CALLOC_STRUCT(gl_vertex_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
case GL_FRAGMENT_PROGRAM_ARB:{
struct i915_fragment_program *prog =
@@ -1325,8 +1326,7 @@ i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id)
if (prog) {
i915_init_program(I915_CONTEXT(ctx), prog);
- return _mesa_init_fragment_program(ctx, &prog->FragProg,
- target, id);
+ return _mesa_init_gl_program(&prog->FragProg.Base, target, id);
}
else
return NULL;
diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk
index a9b963a9eca..d30a053e10f 100644
--- a/src/mesa/drivers/dri/i965/Android.mk
+++ b/src/mesa/drivers/dri/i965/Android.mk
@@ -48,6 +48,7 @@ LOCAL_C_INCLUDES := \
$(MESA_DRI_C_INCLUDES)
LOCAL_SRC_FILES := \
+ $(i965_compiler_FILES) \
$(i965_FILES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index 2e241511049..04b3f9cc8ce 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -33,6 +33,7 @@ AM_CFLAGS = \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
-I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
-I$(top_srcdir)/src/gtest/include \
+ -I$(top_srcdir)/src/glsl/nir \
-I$(top_builddir)/src/glsl/nir \
-I$(top_builddir)/src/mesa/drivers/dri/common \
$(DEFINES) \
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index b242ab55aae..ccd540dabca 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -1,6 +1,7 @@
i965_compiler_FILES = \
brw_cfg.cpp \
brw_cfg.h \
+ brw_compiler.h \
brw_cubemap_normalize.cpp \
brw_dead_control_flow.cpp \
brw_dead_control_flow.h \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index d458ad846bf..5308d175416 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -32,7 +32,7 @@ brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw,
generator(brw->intelScreen->compiler, brw,
mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key),
(struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data),
- NULL, 0, false, "BLORP")
+ 0, false, "BLORP")
{
if (debug_flag)
generator.enable_debug("blorp");
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 91d53eff5a7..10bcd4bafd4 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -305,6 +305,10 @@ cfg_t::cfg_t(exec_list *instructions)
assert(cur_do != NULL && cur_while != NULL);
cur->add_successor(mem_ctx, cur_do);
+
+ if (inst->predicate)
+ cur->add_successor(mem_ctx, cur_while);
+
set_next_block(&cur, cur_while, ip);
/* Pop the stack so we're in the previous loop */
@@ -422,7 +426,11 @@ cfg_t::dump(backend_shader *s)
calculate_idom();
foreach_block (block, this) {
- fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num);
+ if (block->idom)
+ fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num);
+ else
+ fprintf(stderr, "START B%d IDOM(none)", block->num);
+
foreach_list_typed(bblock_link, link, link, &block->parents) {
fprintf(stderr, " <-B%d",
link->block->num);
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 17a745d0373..b0119558c3a 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -241,7 +241,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
}
/* Clear color buffers with fast clear or at least rep16 writes. */
- if (brw->gen >= 6 && brw->gen < 9 && (mask & BUFFER_BITS_COLOR)) {
+ if (brw->gen >= 6 && (mask & BUFFER_BITS_COLOR)) {
if (brw_meta_fast_clear(brw, fb, mask, partial_clear)) {
debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
mask &= ~BUFFER_BITS_COLOR;
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
new file mode 100644
index 00000000000..11c485d2f08
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -0,0 +1,661 @@
+/*
+ * Copyright © 2010 - 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "brw_device_info.h"
+#include "main/mtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ra_regs;
+struct nir_shader;
+struct brw_geometry_program;
+union gl_constant_value;
+
+struct brw_compiler {
+ const struct brw_device_info *devinfo;
+
+ struct {
+ struct ra_regs *regs;
+
+ /**
+ * Array of the ra classes for the unaligned contiguous register
+ * block sizes used.
+ */
+ int *classes;
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+ } vec4_reg_set;
+
+ struct {
+ struct ra_regs *regs;
+
+ /**
+ * Array of the ra classes for the unaligned contiguous register
+ * block sizes used, indexed by register size.
+ */
+ int classes[16];
+
+ /**
+ * Mapping from classes to ra_reg ranges. Each of the per-size
+ * classes corresponds to a range of ra_reg nodes. This array stores
+ * those ranges in the form of first ra_reg in each class and the
+ * total number of ra_reg elements in the last array element. This
+ * way the range of the i'th class is given by:
+ * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
+ */
+ int class_to_ra_reg_range[17];
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+
+ /**
+ * ra class for the aligned pairs we use for PLN, which doesn't
+ * appear in *classes.
+ */
+ int aligned_pairs_class;
+ } fs_reg_sets[2];
+
+ void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+ void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+
+ bool scalar_vs;
+ struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
+};
+
+
+/**
+ * Program key structures.
+ *
+ * When drawing, we look for the currently bound shaders in the program
+ * cache. This is essentially a hash table lookup, and these are the keys.
+ *
+ * Sometimes OpenGL features specified as state need to be simulated via
+ * shader code, due to a mismatch between the API and the hardware. This
+ * is often referred to as "non-orthagonal state" or "NOS". We store NOS
+ * in the program key so it's considered when searching for a program. If
+ * we haven't seen a particular combination before, we have to recompile a
+ * new specialized version.
+ *
+ * Shader compilation should not look up state in gl_context directly, but
+ * instead use the copy in the program key. This guarantees recompiles will
+ * happen correctly.
+ *
+ * @{
+ */
+
+enum PACKED gen6_gather_sampler_wa {
+ WA_SIGN = 1, /* whether we need to sign extend */
+ WA_8BIT = 2, /* if we have an 8bit format needing wa */
+ WA_16BIT = 4, /* if we have a 16bit format needing wa */
+};
+
+/**
+ * Sampler information needed by VS, WM, and GS program cache keys.
+ */
+struct brw_sampler_prog_key_data {
+ /**
+ * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles.
+ */
+ uint16_t swizzles[MAX_SAMPLERS];
+
+ uint32_t gl_clamp_mask[3];
+
+ /**
+ * For RG32F, gather4's channel select is broken.
+ */
+ uint32_t gather_channel_quirk_mask;
+
+ /**
+ * Whether this sampler uses the compressed multisample surface layout.
+ */
+ uint32_t compressed_multisample_layout_mask;
+
+ /**
+ * For Sandybridge, which shader w/a we need for gather quirks.
+ */
+ enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
+};
+
+
+/** The program key for Vertex Shaders. */
+struct brw_vs_prog_key {
+ unsigned program_string_id;
+
+ /*
+ * Per-attribute workaround flags
+ */
+ uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
+
+ bool copy_edgeflag:1;
+
+ bool clamp_vertex_color:1;
+
+ /**
+ * How many user clipping planes are being uploaded to the vertex shader as
+ * push constants.
+ *
+ * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
+ * clip distances.
+ */
+ unsigned nr_userclip_plane_consts:4;
+
+ /**
+ * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
+ * are going to be replaced with point coordinates (as a consequence of a
+ * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
+ * our SF thread requires exact matching between VS outputs and FS inputs,
+ * these texture coordinates will need to be unconditionally included in
+ * the VUE, even if they aren't written by the vertex shader.
+ */
+ uint8_t point_coord_replace;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+/** The program key for Geometry Shaders. */
+struct brw_gs_prog_key
+{
+ unsigned program_string_id;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+/** The program key for Fragment/Pixel Shaders. */
+struct brw_wm_prog_key {
+ uint8_t iz_lookup;
+ bool stats_wm:1;
+ bool flat_shade:1;
+ bool persample_shading:1;
+ bool persample_2x:1;
+ unsigned nr_color_regions:5;
+ bool replicate_alpha:1;
+ bool render_to_fbo:1;
+ bool clamp_fragment_color:1;
+ bool compute_pos_offset:1;
+ bool compute_sample_id:1;
+ unsigned line_aa:2;
+ bool high_quality_derivatives:1;
+
+ uint16_t drawable_height;
+ uint64_t input_slots_valid;
+ unsigned program_string_id;
+ GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
+ float alpha_test_ref;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+struct brw_cs_prog_key {
+ uint32_t program_string_id;
+ struct brw_sampler_prog_key_data tex;
+};
+
+/*
+ * Image metadata structure as laid out in the shader parameter
+ * buffer. Entries have to be 16B-aligned for the vec4 back-end to be
+ * able to use them. That's okay because the padding and any unused
+ * entries [most of them except when we're doing untyped surface
+ * access] will be removed by the uniform packing pass.
+ */
+#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0
+#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4
+#define BRW_IMAGE_PARAM_SIZE_OFFSET 8
+#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12
+#define BRW_IMAGE_PARAM_TILING_OFFSET 16
+#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20
+#define BRW_IMAGE_PARAM_SIZE 24
+
+struct brw_image_param {
+ /** Surface binding table index. */
+ uint32_t surface_idx;
+
+ /** Offset applied to the X and Y surface coordinates. */
+ uint32_t offset[2];
+
+ /** Surface X, Y and Z dimensions. */
+ uint32_t size[3];
+
+ /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
+ * pixels, vertical slice stride in pixels.
+ */
+ uint32_t stride[4];
+
+ /** Log2 of the tiling modulus in the X, Y and Z dimension. */
+ uint32_t tiling[3];
+
+ /**
+ * Right shift to apply for bit 6 address swizzling. Two different
+ * swizzles can be specified and will be applied one after the other. The
+ * resulting address will be:
+ *
+ * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
+ * (addr >> swizzling[1])))
+ *
+ * Use \c 0xff if any of the swizzles is not required.
+ */
+ uint32_t swizzling[2];
+};
+
+struct brw_stage_prog_data {
+ struct {
+ /** size of our binding table. */
+ uint32_t size_bytes;
+
+ /** @{
+ * surface indices for the various groups of surfaces
+ */
+ uint32_t pull_constants_start;
+ uint32_t texture_start;
+ uint32_t gather_texture_start;
+ uint32_t ubo_start;
+ uint32_t ssbo_start;
+ uint32_t abo_start;
+ uint32_t image_start;
+ uint32_t shader_time_start;
+ /** @} */
+ } binding_table;
+
+ GLuint nr_params; /**< number of float params/constants */
+ GLuint nr_pull_params;
+ unsigned nr_image_params;
+
+ unsigned curb_read_length;
+ unsigned total_scratch;
+
+ /**
+ * Register where the thread expects to find input data from the URB
+ * (typically uniforms, followed by vertex or fragment attributes).
+ */
+ unsigned dispatch_grf_start_reg;
+
+ bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
+
+ /* Pointers to tracked values (only valid once
+ * _mesa_load_state_parameters has been called at runtime).
+ */
+ const union gl_constant_value **param;
+ const union gl_constant_value **pull_param;
+
+ /** Image metadata passed to the shader as uniforms. */
+ struct brw_image_param *image_param;
+};
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs.
+ */
+struct brw_wm_prog_data {
+ struct brw_stage_prog_data base;
+
+ GLuint num_varying_inputs;
+
+ GLuint dispatch_grf_start_reg_16;
+ GLuint reg_blocks;
+ GLuint reg_blocks_16;
+
+ struct {
+ /** @{
+ * surface indices the WM-specific surfaces
+ */
+ uint32_t render_target_start;
+ /** @} */
+ } binding_table;
+
+ uint8_t computed_depth_mode;
+
+ bool early_fragment_tests;
+ bool no_8;
+ bool dual_src_blend;
+ bool uses_pos_offset;
+ bool uses_omask;
+ bool uses_kill;
+ bool pulls_bary;
+ uint32_t prog_offset_16;
+
+ /**
+ * Mask of which interpolation modes are required by the fragment shader.
+ * Used in hardware setup on gen6+.
+ */
+ uint32_t barycentric_interp_modes;
+
+ /**
+ * Map from gl_varying_slot to the position within the FS setup data
+ * payload where the varying's attribute vertex deltas should be delivered.
+ * For varying slots that are not used by the FS, the value is -1.
+ */
+ int urb_setup[VARYING_SLOT_MAX];
+};
+
+struct brw_cs_prog_data {
+ struct brw_stage_prog_data base;
+
+ GLuint dispatch_grf_start_reg_16;
+ unsigned local_size[3];
+ unsigned simd_size;
+ bool uses_barrier;
+ bool uses_num_work_groups;
+ unsigned local_invocation_id_regs;
+
+ struct {
+ /** @{
+ * surface indices the CS-specific surfaces
+ */
+ uint32_t work_groups_start;
+ /** @} */
+ } binding_table;
+};
+
+/**
+ * Enum representing the i965-specific vertex results that don't correspond
+ * exactly to any element of gl_varying_slot. The values of this enum are
+ * assigned such that they don't conflict with gl_varying_slot.
+ */
+typedef enum
+{
+ BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
+ BRW_VARYING_SLOT_PAD,
+ /**
+ * Technically this is not a varying but just a placeholder that
+ * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
+ * builtin variable to be compiled correctly. see compile_sf_prog() for
+ * more info.
+ */
+ BRW_VARYING_SLOT_PNTC,
+ BRW_VARYING_SLOT_COUNT
+} brw_varying_slot;
+
+/**
+ * Data structure recording the relationship between the gl_varying_slot enum
+ * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
+ * single octaword within the VUE (128 bits).
+ *
+ * Note that each BRW register contains 256 bits (2 octawords), so when
+ * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
+ * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
+ * in a vertex shader), each register corresponds to a single VUE slot, since
+ * it contains data for two separate vertices.
+ */
+struct brw_vue_map {
+ /**
+ * Bitfield representing all varying slots that are (a) stored in this VUE
+ * map, and (b) actually written by the shader. Does not include any of
+ * the additional varying slots defined in brw_varying_slot.
+ */
+ GLbitfield64 slots_valid;
+
+ /**
+ * Is this VUE map for a separate shader pipeline?
+ *
+ * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
+ * without the linker having a chance to dead code eliminate unused varyings.
+ *
+ * This means that we have to use a fixed slot layout, based on the output's
+ * location field, rather than assigning slots in a compact contiguous block.
+ */
+ bool separate;
+
+ /**
+ * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
+ * not stored in a slot (because they are not written, or because
+ * additional processing is applied before storing them in the VUE), the
+ * value is -1.
+ */
+ signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
+
+ /**
+ * Map from VUE slot to gl_varying_slot value. For slots that do not
+ * directly correspond to a gl_varying_slot, the value comes from
+ * brw_varying_slot.
+ *
+ * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
+ * simplifies code that uses the value stored in slot_to_varying to
+ * create a bit mask).
+ */
+ signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
+
+ /**
+ * Total number of VUE slots in use
+ */
+ int num_slots;
+};
+
+/**
+ * Convert a VUE slot number into a byte offset within the VUE.
+ */
+static inline GLuint brw_vue_slot_to_offset(GLuint slot)
+{
+ return 16*slot;
+}
+
+/**
+ * Convert a vertex output (brw_varying_slot) into a byte offset within the
+ * VUE.
+ */
+static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
+ GLuint varying)
+{
+ return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
+}
+
+void brw_compute_vue_map(const struct brw_device_info *devinfo,
+ struct brw_vue_map *vue_map,
+ GLbitfield64 slots_valid,
+ bool separate_shader);
+
+enum shader_dispatch_mode {
+ DISPATCH_MODE_4X1_SINGLE = 0,
+ DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
+ DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
+ DISPATCH_MODE_SIMD8 = 3,
+};
+
+struct brw_vue_prog_data {
+ struct brw_stage_prog_data base;
+ struct brw_vue_map vue_map;
+
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ /* Used for calculating urb partitions. In the VS, this is the size of the
+ * URB entry used for both input and output to the thread. In the GS, this
+ * is the size of the URB entry used for output.
+ */
+ GLuint urb_entry_size;
+
+ enum shader_dispatch_mode dispatch_mode;
+};
+
+struct brw_vs_prog_data {
+ struct brw_vue_prog_data base;
+
+ GLbitfield64 inputs_read;
+
+ unsigned nr_attributes;
+
+ bool uses_vertexid;
+ bool uses_instanceid;
+};
+
+struct brw_gs_prog_data
+{
+ struct brw_vue_prog_data base;
+
+ /**
+ * Size of an output vertex, measured in HWORDS (32 bytes).
+ */
+ unsigned output_vertex_size_hwords;
+
+ unsigned output_topology;
+
+ /**
+ * Size of the control data (cut bits or StreamID bits), in hwords (32
+ * bytes). 0 if there is no control data.
+ */
+ unsigned control_data_header_size_hwords;
+
+ /**
+ * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
+ * if the control data is StreamID bits, or
+ * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
+ * Ignored if control_data_header_size is 0.
+ */
+ unsigned control_data_format;
+
+ bool include_primitive_id;
+
+ /**
+ * The number of vertices emitted, if constant - otherwise -1.
+ */
+ int static_vertex_count;
+
+ int invocations;
+
+ /**
+ * Gen6 transform feedback enabled flag.
+ */
+ bool gen6_xfb_enabled;
+
+ /**
+ * Gen6: Provoking vertex convention for odd-numbered triangles
+ * in tristrips.
+ */
+ GLuint pv_first:1;
+
+ /**
+ * Gen6: Number of varyings that are output to transform feedback.
+ */
+ GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
+
+ /**
+ * Gen6: Map from the index of a transform feedback binding table entry to the
+ * gl_varying_slot that should be streamed out through that binding table
+ * entry.
+ */
+ unsigned char transform_feedback_bindings[64 /* BRW_MAX_SOL_BINDINGS */];
+
+ /**
+ * Gen6: Map from the index of a transform feedback binding table entry to the
+ * swizzles that should be used when streaming out data through that
+ * binding table entry.
+ */
+ unsigned char transform_feedback_swizzles[64 /* BRW_MAX_SOL_BINDINGS */];
+};
+
+
+/** @} */
+
+/**
+ * Compile a vertex shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_vs_prog_key *key,
+ struct brw_vs_prog_data *prog_data,
+ const struct nir_shader *shader,
+ gl_clip_plane *clip_planes,
+ bool use_legacy_snorm_formula,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
+ * Scratch data used when compiling a GLSL geometry shader.
+ */
+struct brw_gs_compile
+{
+ struct brw_gs_prog_key key;
+ struct brw_gs_prog_data prog_data;
+ struct brw_vue_map input_vue_map;
+
+ struct brw_geometry_program *gp;
+
+ unsigned control_data_bits_per_vertex;
+ unsigned control_data_header_size_bits;
+};
+
+/**
+ * Compile a vertex shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
+ struct brw_gs_compile *c,
+ const struct nir_shader *shader,
+ struct gl_shader_program *shader_prog,
+ void *mem_ctx,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
+ * Compile a fragment shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_wm_prog_key *key,
+ struct brw_wm_prog_data *prog_data,
+ const struct nir_shader *shader,
+ struct gl_program *prog,
+ int shader_time_index8,
+ int shader_time_index16,
+ bool use_rep_send,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
+ * Compile a compute shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_cs_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ const struct nir_shader *shader,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 6b2bbd21703..3b125448e14 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -51,7 +51,7 @@
#include "brw_context.h"
#include "brw_defines.h"
-#include "brw_shader.h"
+#include "brw_compiler.h"
#include "brw_draw.h"
#include "brw_state.h"
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index aa1284db3ce..4f503ae4869 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -40,6 +40,7 @@
#include "main/mm.h"
#include "main/mtypes.h"
#include "brw_structs.h"
+#include "brw_compiler.h"
#include "intel_aub.h"
#include "program/prog_parameter.h"
@@ -340,260 +341,6 @@ struct brw_shader {
bool compiled_once;
};
-struct brw_stage_prog_data {
- struct {
- /** size of our binding table. */
- uint32_t size_bytes;
-
- /** @{
- * surface indices for the various groups of surfaces
- */
- uint32_t pull_constants_start;
- uint32_t texture_start;
- uint32_t gather_texture_start;
- uint32_t ubo_start;
- uint32_t abo_start;
- uint32_t image_start;
- uint32_t shader_time_start;
- /** @} */
- } binding_table;
-
- GLuint nr_params; /**< number of float params/constants */
- GLuint nr_pull_params;
- unsigned nr_image_params;
-
- unsigned curb_read_length;
- unsigned total_scratch;
-
- /**
- * Register where the thread expects to find input data from the URB
- * (typically uniforms, followed by vertex or fragment attributes).
- */
- unsigned dispatch_grf_start_reg;
-
- bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
-
- /* Pointers to tracked values (only valid once
- * _mesa_load_state_parameters has been called at runtime).
- */
- const gl_constant_value **param;
- const gl_constant_value **pull_param;
-
- /** Image metadata passed to the shader as uniforms. */
- struct brw_image_param *image_param;
-};
-
-/*
- * Image metadata structure as laid out in the shader parameter
- * buffer. Entries have to be 16B-aligned for the vec4 back-end to be
- * able to use them. That's okay because the padding and any unused
- * entries [most of them except when we're doing untyped surface
- * access] will be removed by the uniform packing pass.
- */
-#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0
-#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4
-#define BRW_IMAGE_PARAM_SIZE_OFFSET 8
-#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12
-#define BRW_IMAGE_PARAM_TILING_OFFSET 16
-#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20
-#define BRW_IMAGE_PARAM_SIZE 24
-
-struct brw_image_param {
- /** Surface binding table index. */
- uint32_t surface_idx;
-
- /** Offset applied to the X and Y surface coordinates. */
- uint32_t offset[2];
-
- /** Surface X, Y and Z dimensions. */
- uint32_t size[3];
-
- /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
- * pixels, vertical slice stride in pixels.
- */
- uint32_t stride[4];
-
- /** Log2 of the tiling modulus in the X, Y and Z dimension. */
- uint32_t tiling[3];
-
- /**
- * Right shift to apply for bit 6 address swizzling. Two different
- * swizzles can be specified and will be applied one after the other. The
- * resulting address will be:
- *
- * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
- * (addr >> swizzling[1])))
- *
- * Use \c 0xff if any of the swizzles is not required.
- */
- uint32_t swizzling[2];
-};
-
-/* Data about a particular attempt to compile a program. Note that
- * there can be many of these, each in a different GL state
- * corresponding to a different brw_wm_prog_key struct, with different
- * compiled programs.
- */
-struct brw_wm_prog_data {
- struct brw_stage_prog_data base;
-
- GLuint num_varying_inputs;
-
- GLuint dispatch_grf_start_reg_16;
- GLuint reg_blocks;
- GLuint reg_blocks_16;
-
- struct {
- /** @{
- * surface indices the WM-specific surfaces
- */
- uint32_t render_target_start;
- /** @} */
- } binding_table;
-
- uint8_t computed_depth_mode;
-
- bool early_fragment_tests;
- bool no_8;
- bool dual_src_blend;
- bool uses_pos_offset;
- bool uses_omask;
- bool uses_kill;
- bool pulls_bary;
- uint32_t prog_offset_16;
-
- /**
- * Mask of which interpolation modes are required by the fragment shader.
- * Used in hardware setup on gen6+.
- */
- uint32_t barycentric_interp_modes;
-
- /**
- * Map from gl_varying_slot to the position within the FS setup data
- * payload where the varying's attribute vertex deltas should be delivered.
- * For varying slots that are not used by the FS, the value is -1.
- */
- int urb_setup[VARYING_SLOT_MAX];
-};
-
-struct brw_cs_prog_data {
- struct brw_stage_prog_data base;
-
- GLuint dispatch_grf_start_reg_16;
- unsigned local_size[3];
- unsigned simd_size;
- bool uses_barrier;
- bool uses_num_work_groups;
- unsigned local_invocation_id_regs;
-
- struct {
- /** @{
- * surface indices the CS-specific surfaces
- */
- uint32_t work_groups_start;
- /** @} */
- } binding_table;
-};
-
-/**
- * Enum representing the i965-specific vertex results that don't correspond
- * exactly to any element of gl_varying_slot. The values of this enum are
- * assigned such that they don't conflict with gl_varying_slot.
- */
-typedef enum
-{
- BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
- BRW_VARYING_SLOT_PAD,
- /**
- * Technically this is not a varying but just a placeholder that
- * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
- * builtin variable to be compiled correctly. see compile_sf_prog() for
- * more info.
- */
- BRW_VARYING_SLOT_PNTC,
- BRW_VARYING_SLOT_COUNT
-} brw_varying_slot;
-
-
-/**
- * Data structure recording the relationship between the gl_varying_slot enum
- * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
- * single octaword within the VUE (128 bits).
- *
- * Note that each BRW register contains 256 bits (2 octawords), so when
- * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
- * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
- * in a vertex shader), each register corresponds to a single VUE slot, since
- * it contains data for two separate vertices.
- */
-struct brw_vue_map {
- /**
- * Bitfield representing all varying slots that are (a) stored in this VUE
- * map, and (b) actually written by the shader. Does not include any of
- * the additional varying slots defined in brw_varying_slot.
- */
- GLbitfield64 slots_valid;
-
- /**
- * Is this VUE map for a separate shader pipeline?
- *
- * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
- * without the linker having a chance to dead code eliminate unused varyings.
- *
- * This means that we have to use a fixed slot layout, based on the output's
- * location field, rather than assigning slots in a compact contiguous block.
- */
- bool separate;
-
- /**
- * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
- * not stored in a slot (because they are not written, or because
- * additional processing is applied before storing them in the VUE), the
- * value is -1.
- */
- signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Map from VUE slot to gl_varying_slot value. For slots that do not
- * directly correspond to a gl_varying_slot, the value comes from
- * brw_varying_slot.
- *
- * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
- * simplifies code that uses the value stored in slot_to_varying to
- * create a bit mask).
- */
- signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Total number of VUE slots in use
- */
- int num_slots;
-};
-
-/**
- * Convert a VUE slot number into a byte offset within the VUE.
- */
-static inline GLuint brw_vue_slot_to_offset(GLuint slot)
-{
- return 16*slot;
-}
-
-/**
- * Convert a vertex output (brw_varying_slot) into a byte offset within the
- * VUE.
- */
-static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
- GLuint varying)
-{
- return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
-}
-
-void brw_compute_vue_map(const struct brw_device_info *devinfo,
- struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid,
- bool separate_shader);
-
-
/**
* Bitmask indicating which fragment shader inputs represent varyings (and
* hence have to be delivered to the fragment shader by the SF/SBE stage).
@@ -670,39 +417,6 @@ struct brw_ff_gs_prog_data {
unsigned svbi_postincrement_value;
};
-enum shader_dispatch_mode {
- DISPATCH_MODE_4X1_SINGLE = 0,
- DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
- DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
- DISPATCH_MODE_SIMD8 = 3,
-};
-
-struct brw_vue_prog_data {
- struct brw_stage_prog_data base;
- struct brw_vue_map vue_map;
-
- GLuint urb_read_length;
- GLuint total_grf;
-
- /* Used for calculating urb partitions. In the VS, this is the size of the
- * URB entry used for both input and output to the thread. In the GS, this
- * is the size of the URB entry used for output.
- */
- GLuint urb_entry_size;
-
- enum shader_dispatch_mode dispatch_mode;
-};
-
-
-struct brw_vs_prog_data {
- struct brw_vue_prog_data base;
-
- GLbitfield64 inputs_read;
-
- bool uses_vertexid;
- bool uses_instanceid;
-};
-
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 32
@@ -715,9 +429,6 @@ struct brw_vs_prog_data {
/** Max number of SSBOs in a shader */
#define BRW_MAX_SSBO 12
-/** Max number of combined UBOs and SSBOs in a shader */
-#define BRW_MAX_COMBINED_UBO_SSBO (BRW_MAX_UBO + BRW_MAX_SSBO)
-
/** Max number of atomic counter buffer objects in a shader */
#define BRW_MAX_ABO 16
@@ -763,71 +474,6 @@ struct brw_vs_prog_data {
#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
-struct brw_gs_prog_data
-{
- struct brw_vue_prog_data base;
-
- /**
- * Size of an output vertex, measured in HWORDS (32 bytes).
- */
- unsigned output_vertex_size_hwords;
-
- unsigned output_topology;
-
- /**
- * Size of the control data (cut bits or StreamID bits), in hwords (32
- * bytes). 0 if there is no control data.
- */
- unsigned control_data_header_size_hwords;
-
- /**
- * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
- * if the control data is StreamID bits, or
- * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
- * Ignored if control_data_header_size is 0.
- */
- unsigned control_data_format;
-
- bool include_primitive_id;
-
- /**
- * The number of vertices emitted, if constant - otherwise -1.
- */
- int static_vertex_count;
-
- int invocations;
-
- /**
- * Gen6 transform feedback enabled flag.
- */
- bool gen6_xfb_enabled;
-
- /**
- * Gen6: Provoking vertex convention for odd-numbered triangles
- * in tristrips.
- */
- GLuint pv_first:1;
-
- /**
- * Gen6: Number of varyings that are output to transform feedback.
- */
- GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * gl_varying_slot that should be streamed out through that binding table
- * entry.
- */
- unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * swizzles that should be used when streaming out data through that
- * binding table entry.
- */
- unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
-};
-
/**
* Stride in bytes between shader_time entries.
*
@@ -953,6 +599,8 @@ struct intel_batchbuffer {
} saved;
};
+#define MAX_GS_INPUT_VERTICES 6
+
#define BRW_MAX_XFB_STREAMS 4
struct brw_transform_feedback_object {
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
index 45fb816c160..263d224e882 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -105,9 +105,15 @@ brw_codegen_cs_prog(struct brw_context *brw,
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
st_index = brw_get_shader_time_index(brw, prog, &cp->program.Base, ST_CS);
- program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
- &cp->program, prog, st_index, &program_size);
+ char *error_str;
+ program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx,
+ key, &prog_data, cp->program.Base.nir,
+ st_index, &program_size, &error_str);
if (program == NULL) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, error_str);
+ _mesa_problem(NULL, "Failed to compile compute shader: %s\n", error_str);
+
ralloc_free(mem_ctx);
return false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
index 17c2ff9871a..899e340f14e 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -27,11 +27,6 @@
#include "brw_program.h"
-struct brw_cs_prog_key {
- uint32_t program_string_id;
- struct brw_sampler_prog_key_data tex;
-};
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -39,16 +34,6 @@ extern "C" {
void
brw_upload_cs_prog(struct brw_context *brw);
-const unsigned *
-brw_cs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_cs_prog_key *key,
- struct brw_cs_prog_data *prog_data,
- struct gl_compute_program *cp,
- struct gl_shader_program *prog,
- int shader_time_index,
- unsigned *final_assembly_size);
-
void
brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
void *buffer, uint32_t threads, uint32_t stride);
diff --git a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
index 33571292007..33d2048e657 100644
--- a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
@@ -30,7 +30,7 @@
* \author Eric Anholt <[email protected]>
*/
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir.h"
#include "program/prog_instruction.h" /* For WRITEMASK_* */
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 761aa0ec5fa..0ac1ad9378b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
struct brw_reg mrf,
bool noperspective,
unsigned mode,
- unsigned data,
+ struct brw_reg data,
unsigned msg_length,
unsigned response_length);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index dc699bb6321..bf2fee9ed48 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3212,26 +3212,29 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
struct brw_reg mrf,
bool noperspective,
unsigned mode,
- unsigned data,
+ struct brw_reg data,
unsigned msg_length,
unsigned response_length)
{
const struct brw_device_info *devinfo = p->devinfo;
- struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
-
- brw_set_dest(p, insn, dest);
- brw_set_src0(p, insn, mrf);
- brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
- msg_length, response_length,
- false /* header is never present for PI */,
- false);
+ struct brw_inst *insn;
+ const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current);
- brw_inst_set_pi_simd_mode(
- devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16);
+ /* brw_send_indirect_message will automatically use a direct send message
+ * if data is actually immediate.
+ */
+ insn = brw_send_indirect_message(p,
+ GEN7_SFID_PIXEL_INTERPOLATOR,
+ dest,
+ mrf,
+ vec1(data));
+ brw_inst_set_mlen(devinfo, insn, msg_length);
+ brw_inst_set_rlen(devinfo, insn, response_length);
+
+ brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */
brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
brw_inst_set_pi_message_type(devinfo, insn, mode);
- brw_inst_set_pi_message_data(devinfo, insn, data);
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5049851c617..0562c5a9981 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -47,7 +47,7 @@
#include "brw_dead_control_flow.h"
#include "main/uniforms.h"
#include "brw_fs_live_variables.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "program/sampler.h"
using namespace brw;
@@ -338,6 +338,18 @@ fs_inst::can_do_source_mods(const struct brw_device_info *devinfo)
}
bool
+fs_inst::can_change_types() const
+{
+ return dst.type == src[0].type &&
+ !src[0].abs && !src[0].negate && !saturate &&
+ (opcode == BRW_OPCODE_MOV ||
+ (opcode == BRW_OPCODE_SEL &&
+ dst.type == src[1].type &&
+ predicate != BRW_PREDICATE_NONE &&
+ !src[1].abs && !src[1].negate));
+}
+
+bool
fs_inst::has_side_effects() const
{
return this->eot || backend_instruction::has_side_effects();
@@ -1049,11 +1061,11 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
unsigned int array_elements;
if (type->is_array()) {
- array_elements = type->length;
+ array_elements = type->arrays_of_arrays_size();
if (array_elements == 0) {
fail("dereferenced array '%s' has length 0\n", name);
}
- type = type->fields.array;
+ type = type->without_array();
} else {
array_elements = 1;
}
@@ -1509,25 +1521,14 @@ void
fs_visitor::assign_vs_urb_setup()
{
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
- int grf, count, slot, channel, attr;
assert(stage == MESA_SHADER_VERTEX);
- count = _mesa_bitcount_64(vs_prog_data->inputs_read);
+ int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
count++;
/* Each attribute is 4 regs. */
- this->first_non_payload_grf += count * 4;
-
- unsigned vue_entries =
- MAX2(count, vs_prog_data->base.vue_map.num_slots);
-
- /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS
- * command). Each attribute is 16 bytes (4 floats/dwords), so each unit
- * fits four attributes.
- */
- vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
- vs_prog_data->base.urb_read_length = (count + 1) / 2;
+ this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
assert(vs_prog_data->base.urb_read_length <= 15);
@@ -1535,25 +1536,10 @@ fs_visitor::assign_vs_urb_setup()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == ATTR) {
-
- if (inst->src[i].reg == VERT_ATTRIB_MAX) {
- slot = count - 1;
- } else {
- /* Attributes come in in a contiguous block, ordered by their
- * gl_vert_attrib value. That means we can compute the slot
- * number for an attribute by masking out the enabled
- * attributes before it and counting the bits.
- */
- attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
- slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
- BITFIELD64_MASK(attr));
- }
-
- channel = inst->src[i].reg_offset & 3;
-
- grf = payload.num_regs +
- prog_data->curb_read_length +
- slot * 4 + channel;
+ int grf = payload.num_regs +
+ prog_data->curb_read_length +
+ inst->src[i].reg +
+ inst->src[i].reg_offset;
inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg =
@@ -5134,41 +5120,140 @@ fs_visitor::run_cs()
return !failed;
}
+/**
+ * Return a bitfield where bit n is set if barycentric interpolation mode n
+ * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
+ */
+static unsigned
+brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
+ bool shade_model_flat,
+ bool persample_shading,
+ const nir_shader *shader)
+{
+ unsigned barycentric_interp_modes = 0;
+
+ nir_foreach_variable(var, &shader->inputs) {
+ enum glsl_interp_qualifier interp_qualifier =
+ (enum glsl_interp_qualifier)var->data.interpolation;
+ bool is_centroid = var->data.centroid && !persample_shading;
+ bool is_sample = var->data.sample || persample_shading;
+ bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) ||
+ (var->data.location == VARYING_SLOT_COL1);
+
+ /* Ignore WPOS and FACE, because they don't require interpolation. */
+ if (var->data.location == VARYING_SLOT_POS ||
+ var->data.location == VARYING_SLOT_FACE)
+ continue;
+
+ /* Determine the set (or sets) of barycentric coordinates needed to
+ * interpolate this variable. Note that when
+ * brw->needs_unlit_centroid_workaround is set, centroid interpolation
+ * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
+ * for lit pixels, so we need both sets of barycentric coordinates.
+ */
+ if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) {
+ if (is_centroid) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+ } else if (is_sample) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC;
+ }
+ if ((!is_centroid && !is_sample) ||
+ devinfo->needs_unlit_centroid_workaround) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+ }
+ } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH ||
+ (!(shade_model_flat && is_gl_Color) &&
+ interp_qualifier == INTERP_QUALIFIER_NONE)) {
+ if (is_centroid) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
+ } else if (is_sample) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC;
+ }
+ if ((!is_centroid && !is_sample) ||
+ devinfo->needs_unlit_centroid_workaround) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+ }
+ }
+ }
+
+ return barycentric_interp_modes;
+}
+
+static uint8_t
+computed_depth_mode(const nir_shader *shader)
+{
+ if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ switch (shader->info.fs.depth_layout) {
+ case FRAG_DEPTH_LAYOUT_NONE:
+ case FRAG_DEPTH_LAYOUT_ANY:
+ return BRW_PSCDEPTH_ON;
+ case FRAG_DEPTH_LAYOUT_GREATER:
+ return BRW_PSCDEPTH_ON_GE;
+ case FRAG_DEPTH_LAYOUT_LESS:
+ return BRW_PSCDEPTH_ON_LE;
+ case FRAG_DEPTH_LAYOUT_UNCHANGED:
+ return BRW_PSCDEPTH_OFF;
+ }
+ }
+ return BRW_PSCDEPTH_OFF;
+}
+
const unsigned *
-brw_wm_fs_emit(struct brw_context *brw,
+brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_wm_prog_key *key,
struct brw_wm_prog_data *prog_data,
- struct gl_fragment_program *fp,
- struct gl_shader_program *prog,
+ const nir_shader *shader,
+ struct gl_program *prog,
int shader_time_index8, int shader_time_index16,
- unsigned *final_assembly_size)
+ bool use_rep_send,
+ unsigned *final_assembly_size,
+ char **error_str)
{
- /* Now the main event: Visit the shader IR and generate our FS IR for it.
+ /* key->alpha_test_func means simulating alpha testing via discards,
+ * so the shader definitely kills pixels.
*/
- fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &fp->Base, fp->Base.nir, 8, shader_time_index8);
+ prog_data->uses_kill = shader->info.fs.uses_discard || key->alpha_test_func;
+ prog_data->uses_omask =
+ shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
+ prog_data->computed_depth_mode = computed_depth_mode(shader);
+
+ prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
+
+ prog_data->barycentric_interp_modes =
+ brw_compute_barycentric_interp_modes(compiler->devinfo,
+ key->flat_shade,
+ key->persample_shading,
+ shader);
+
+ fs_visitor v(compiler, log_data, mem_ctx, key,
+ &prog_data->base, prog, shader, 8,
+ shader_time_index8);
if (!v.run_fs(false /* do_rep_send */)) {
- if (prog) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, v.fail_msg);
- }
-
- _mesa_problem(NULL, "Failed to compile fragment shader: %s\n",
- v.fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
return NULL;
}
cfg_t *simd16_cfg = NULL;
- fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &fp->Base, fp->Base.nir, 16, shader_time_index16);
- if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
+ fs_visitor v2(compiler, log_data, mem_ctx, key,
+ &prog_data->base, prog, shader, 16,
+ shader_time_index16);
+ if (likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) {
if (!v.simd16_unsupported) {
/* Try a SIMD16 compile */
v2.import_uniforms(&v);
- if (!v2.run_fs(brw->use_rep_send)) {
- perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg);
+ if (!v2.run_fs(use_rep_send)) {
+ compiler->shader_perf_log(log_data,
+ "SIMD16 shader failed to compile: %s",
+ v2.fail_msg);
} else {
simd16_cfg = v2.cfg;
}
@@ -5176,8 +5261,8 @@ brw_wm_fs_emit(struct brw_context *brw,
}
cfg_t *simd8_cfg;
- int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8;
- if ((no_simd8 || brw->gen < 5) && simd16_cfg) {
+ int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || use_rep_send;
+ if ((no_simd8 || compiler->devinfo->gen < 5) && simd16_cfg) {
simd8_cfg = NULL;
prog_data->no_8 = true;
} else {
@@ -5185,20 +5270,14 @@ brw_wm_fs_emit(struct brw_context *brw,
prog_data->no_8 = false;
}
- fs_generator g(brw->intelScreen->compiler, brw,
- mem_ctx, (void *) key, &prog_data->base,
- &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS");
+ fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base,
+ v.promoted_constants, v.runtime_check_aads_emit, "FS");
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- char *name;
- if (prog)
- name = ralloc_asprintf(mem_ctx, "%s fragment shader %d",
- prog->Label ? prog->Label : "unnamed",
- prog->Name);
- else
- name = ralloc_asprintf(mem_ctx, "fragment program %d", fp->Base.Id);
-
- g.enable_debug(name);
+ g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s",
+ shader->info.label ? shader->info.label :
+ "unnamed",
+ shader->info.name));
}
if (simd8_cfg)
@@ -5283,29 +5362,32 @@ fs_visitor::emit_cs_work_group_id_setup()
}
const unsigned *
-brw_cs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_cs_prog_key *key,
- struct brw_cs_prog_data *prog_data,
- struct gl_compute_program *cp,
- struct gl_shader_program *prog,
- int shader_time_index,
- unsigned *final_assembly_size)
+brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_cs_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ const nir_shader *shader,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
{
- prog_data->local_size[0] = cp->LocalSize[0];
- prog_data->local_size[1] = cp->LocalSize[1];
- prog_data->local_size[2] = cp->LocalSize[2];
+ prog_data->local_size[0] = shader->info.cs.local_size[0];
+ prog_data->local_size[1] = shader->info.cs.local_size[1];
+ prog_data->local_size[2] = shader->info.cs.local_size[2];
unsigned local_workgroup_size =
- cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
- unsigned max_cs_threads = brw->intelScreen->compiler->devinfo->max_cs_threads;
+ shader->info.cs.local_size[0] * shader->info.cs.local_size[1] *
+ shader->info.cs.local_size[2];
+
+ unsigned max_cs_threads = compiler->devinfo->max_cs_threads;
cfg_t *cfg = NULL;
const char *fail_msg = NULL;
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
- fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &cp->Base, cp->Base.nir, 8, shader_time_index);
+ fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base,
+ NULL, /* Never used in core profile */
+ shader, 8, shader_time_index);
if (!v8.run_cs()) {
fail_msg = v8.fail_msg;
} else if (local_workgroup_size <= 8 * max_cs_threads) {
@@ -5313,15 +5395,18 @@ brw_cs_emit(struct brw_context *brw,
prog_data->simd_size = 8;
}
- fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &cp->Base, cp->Base.nir, 16, shader_time_index);
+ fs_visitor v16(compiler, log_data, mem_ctx, key, &prog_data->base,
+ NULL, /* Never used in core profile */
+ shader, 16, shader_time_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported &&
local_workgroup_size <= 16 * max_cs_threads) {
/* Try a SIMD16 compile */
v16.import_uniforms(&v8);
if (!v16.run_cs()) {
- perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+ compiler->shader_perf_log(log_data,
+ "SIMD16 shader failed to compile: %s",
+ v16.fail_msg);
if (!cfg) {
fail_msg =
"Couldn't generate SIMD16 program and not "
@@ -5335,20 +5420,19 @@ brw_cs_emit(struct brw_context *brw,
if (unlikely(cfg == NULL)) {
assert(fail_msg);
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, fail_msg);
- _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
- fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, fail_msg);
+
return NULL;
}
- fs_generator g(brw->intelScreen->compiler, brw,
- mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+ fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base,
v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
if (INTEL_DEBUG & DEBUG_CS) {
- char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
- prog->Label ? prog->Label : "unnamed",
- prog->Name);
+ char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s",
+ shader->info.label ? shader->info.label :
+ "unnamed",
+ shader->info.name);
g.enable_debug(name);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index e8b511f9ce6..171338dcc0b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -48,7 +48,7 @@ extern "C" {
#include "brw_wm.h"
#include "intel_asm_annotation.h"
}
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir.h"
#include "glsl/nir/nir.h"
#include "program/sampler.h"
@@ -96,7 +96,7 @@ public:
const void *key,
struct brw_stage_prog_data *prog_data,
struct gl_program *prog,
- nir_shader *shader,
+ const nir_shader *shader,
unsigned dispatch_width,
int shader_time_index);
@@ -400,7 +400,6 @@ public:
void *mem_ctx,
const void *key,
struct brw_stage_prog_data *prog_data,
- struct gl_program *fp,
unsigned promoted_constants,
bool runtime_check_aads_emit,
const char *stage_abbrev);
@@ -499,8 +498,6 @@ private:
const void * const key;
struct brw_stage_prog_data * const prog_data;
- const struct gl_program *prog;
-
unsigned dispatch_width; /**< 8 or 16 */
exec_list discard_halt_patches;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 277b6cc3a60..a13d001291c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -45,7 +45,7 @@
#include "brw_wm.h"
#include "glsl/ir.h"
#include "glsl/ir_expression_flattening.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
public:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 230b0caec47..5589716239a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -275,17 +275,6 @@ is_logic_op(enum opcode opcode)
opcode == BRW_OPCODE_NOT);
}
-static bool
-can_change_source_types(fs_inst *inst)
-{
- return !inst->src[0].abs && !inst->src[0].negate &&
- inst->dst.type == inst->src[0].type &&
- (inst->opcode == BRW_OPCODE_MOV ||
- (inst->opcode == BRW_OPCODE_SEL &&
- inst->predicate != BRW_PREDICATE_NONE &&
- !inst->src[1].abs && !inst->src[1].negate));
-}
-
bool
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
{
@@ -368,7 +357,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (has_source_modifiers &&
entry->dst.type != inst->src[arg].type &&
- !can_change_source_types(inst))
+ !inst->can_change_types())
return false;
if (devinfo->gen >= 8 && (entry->src.negate || entry->src.abs) &&
@@ -438,7 +427,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
* type. If we got here, then we can just change the source and
* destination types of the instruction and keep going.
*/
- assert(can_change_source_types(inst));
+ assert(inst->can_change_types());
for (int i = 0; i < inst->sources; i++) {
inst->src[i].type = entry->dst.type;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6f8b75e339f..13c495cd395 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -131,7 +131,6 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const void *key,
struct brw_stage_prog_data *prog_data,
- struct gl_program *prog,
unsigned promoted_constants,
bool runtime_check_aads_emit,
const char *stage_abbrev)
@@ -139,7 +138,7 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
: compiler(compiler), log_data(log_data),
devinfo(compiler->devinfo), key(key),
prog_data(prog_data),
- prog(prog), promoted_constants(promoted_constants),
+ promoted_constants(promoted_constants),
runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false),
stage_abbrev(stage_abbrev), mem_ctx(mem_ctx)
{
@@ -1377,15 +1376,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
struct brw_reg msg_data,
unsigned msg_type)
{
- assert(msg_data.file == BRW_IMMEDIATE_VALUE &&
- msg_data.type == BRW_REGISTER_TYPE_UD);
+ assert(msg_data.type == BRW_REGISTER_TYPE_UD);
brw_pixel_interpolator_query(p,
retype(dst, BRW_REGISTER_TYPE_UW),
src,
inst->pi_noperspective,
msg_type,
- msg_data.dw1.ud,
+ msg_data,
inst->mlen,
inst->regs_written);
}
@@ -2188,7 +2186,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
100.0f * (before_size - after_size) / before_size);
dump_assembly(p->store, annotation.ann_count, annotation.ann,
- p->devinfo, prog);
+ p->devinfo);
ralloc_free(annotation.ann);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 19aec92fad1..ce066a9778e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -259,16 +259,15 @@ fs_live_variables::compute_start_end()
struct block_data *bd = &block_data[block->num];
for (int i = 0; i < num_vars; i++) {
- if (BITSET_TEST(bd->livein, i)) {
- start[i] = MIN2(start[i], block->start_ip);
- end[i] = MAX2(end[i], block->start_ip);
- }
-
- if (BITSET_TEST(bd->liveout, i)) {
- start[i] = MIN2(start[i], block->end_ip);
- end[i] = MAX2(end[i], block->end_ip);
- }
+ if (BITSET_TEST(bd->livein, i)) {
+ start[i] = MIN2(start[i], block->start_ip);
+ end[i] = MAX2(end[i], block->start_ip);
+ }
+ if (BITSET_TEST(bd->liveout, i)) {
+ start[i] = MIN2(start[i], block->end_ip);
+ end[i] = MAX2(end[i], block->end_ip);
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 45c3f4ef3b4..feedbfbb2e3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -56,61 +56,25 @@ fs_visitor::emit_nir_code()
void
fs_visitor::nir_setup_inputs()
{
+ if (stage != MESA_SHADER_FRAGMENT)
+ return;
+
nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
nir_foreach_variable(var, &nir->inputs) {
- enum brw_reg_type type = brw_type_for_base_type(var->type);
fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
fs_reg reg;
- switch (stage) {
- case MESA_SHADER_VERTEX: {
- /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
- * stored in nir_variable::location.
- *
- * However, NIR's load_input intrinsics use a different index - an
- * offset into a single contiguous array containing all inputs.
- * This index corresponds to the nir_variable::driver_location field.
- *
- * So, we need to copy from fs_reg(ATTR, var->location) to
- * offset(nir_inputs, var->data.driver_location).
- */
- const glsl_type *const t = var->type->without_array();
- const unsigned components = t->components();
- const unsigned cols = t->matrix_columns;
- const unsigned elts = t->vector_elements;
- unsigned array_length = var->type->is_array() ? var->type->length : 1;
- for (unsigned i = 0; i < array_length; i++) {
- for (unsigned j = 0; j < cols; j++) {
- for (unsigned k = 0; k < elts; k++) {
- bld.MOV(offset(retype(input, type), bld,
- components * i + elts * j + k),
- offset(fs_reg(ATTR, var->data.location + i, type),
- bld, 4 * j + k));
- }
- }
- }
- break;
- }
- case MESA_SHADER_GEOMETRY:
- case MESA_SHADER_COMPUTE:
- case MESA_SHADER_TESS_CTRL:
- case MESA_SHADER_TESS_EVAL:
- unreachable("fs_visitor not used for these stages yet.");
- break;
- case MESA_SHADER_FRAGMENT:
- if (var->data.location == VARYING_SLOT_POS) {
- reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
- var->data.origin_upper_left);
- emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
- input, reg), 0xF);
- } else {
- emit_general_interpolation(input, var->name, var->type,
- (glsl_interp_qualifier) var->data.interpolation,
- var->data.location, var->data.centroid,
- var->data.sample);
- }
- break;
+ if (var->data.location == VARYING_SLOT_POS) {
+ reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
+ var->data.origin_upper_left);
+ emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
+ input, reg), 0xF);
+ } else {
+ emit_general_interpolation(input, var->name, var->type,
+ (glsl_interp_qualifier) var->data.interpolation,
+ var->data.location, var->data.centroid,
+ var->data.sample);
}
}
}
@@ -125,9 +89,7 @@ fs_visitor::nir_setup_outputs()
nir_foreach_variable(var, &nir->outputs) {
fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
- int vector_elements =
- var->type->is_array() ? var->type->fields.array->vector_elements
- : var->type->vector_elements;
+ int vector_elements = var->type->without_array()->vector_elements;
switch (stage) {
case MESA_SHADER_VERTEX:
@@ -1180,6 +1142,36 @@ get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
}
}
+static fs_inst *
+emit_pixel_interpolater_send(const fs_builder &bld,
+ enum opcode opcode,
+ const fs_reg &dst,
+ const fs_reg &src,
+ const fs_reg &desc,
+ glsl_interp_qualifier interpolation)
+{
+ fs_inst *inst;
+ fs_reg payload;
+ int mlen;
+
+ if (src.file == BAD_FILE) {
+ /* Dummy payload */
+ payload = bld.vgrf(BRW_REGISTER_TYPE_F, 1);
+ mlen = 1;
+ } else {
+ payload = src;
+ mlen = 2 * bld.dispatch_width() / 8;
+ }
+
+ inst = bld.emit(opcode, dst, payload, desc);
+ inst->mlen = mlen;
+ /* 2 floats per slot returned */
+ inst->regs_written = 2 * bld.dispatch_width() / 8;
+ inst->pi_noperspective = interpolation == INTERP_QUALIFIER_NOPERSPECTIVE;
+
+ return inst;
+}
+
void
fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
{
@@ -1440,7 +1432,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- nir->info.num_ssbos - 1);
+ nir->info.num_ubos - 1);
}
if (has_indirect) {
@@ -1488,21 +1480,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg surf_index;
if (const_uniform_block) {
- unsigned index = stage_prog_data->binding_table.ubo_start +
+ unsigned index = stage_prog_data->binding_table.ssbo_start +
const_uniform_block->u[0];
surf_index = fs_reg(index);
brw_mark_surface_used(prog_data, index);
} else {
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[0]),
- fs_reg(stage_prog_data->binding_table.ubo_start));
+ fs_reg(stage_prog_data->binding_table.ssbo_start));
surf_index = bld.emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
*/
brw_mark_surface_used(prog_data,
- stage_prog_data->binding_table.ubo_start +
+ stage_prog_data->binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -1545,8 +1537,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_load_input: {
unsigned index = 0;
for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = offset(retype(nir_inputs, dest.type), bld,
- instr->const_index[0] + index);
+ fs_reg src;
+ if (stage == MESA_SHADER_VERTEX) {
+ src = offset(fs_reg(ATTR, instr->const_index[0], dest.type), bld, index);
+ } else {
+ src = offset(retype(nir_inputs, dest.type), bld,
+ instr->const_index[0] + index);
+ }
if (has_indirect)
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
index++;
@@ -1583,28 +1580,81 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
-
- /* For most messages, we need one reg of ignored data; the hardware
- * requires mlen==1 even when there is no payload. in the per-slot
- * offset case, we'll replace this with the proper source data.
- */
- fs_reg src = vgrf(glsl_type::float_type);
- int mlen = 1; /* one reg unless overriden */
- fs_inst *inst;
+ const glsl_interp_qualifier interpolation =
+ (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
switch (instr->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
- dst_xy, src, fs_reg(0u));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_CENTROID,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(0u),
+ interpolation);
break;
case nir_intrinsic_interp_var_at_sample: {
- /* XXX: We should probably handle non-constant sample id's */
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
- assert(const_sample);
- unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
- fs_reg(msg_data));
+
+ if (const_sample) {
+ unsigned msg_data = const_sample->i[0] << 4;
+
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(msg_data),
+ interpolation);
+ } else {
+ const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
+
+ if (nir_src_is_dynamically_uniform(instr->src[0])) {
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ msg_data,
+ interpolation);
+ } else {
+ /* Make a loop that sends a message to the pixel interpolater
+ * for the sample number in each live channel. If there are
+ * multiple channels with the same sample number then these
+ * will be handled simultaneously with a single interation of
+ * the loop.
+ */
+ bld.emit(BRW_OPCODE_DO);
+
+ /* Get the next live sample number into sample_id_reg */
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+
+ /* Set the flag register so that we can perform the send
+ * message on all channels that have the same sample number
+ */
+ bld.CMP(bld.null_reg_ud(),
+ sample_src, sample_id,
+ BRW_CONDITIONAL_EQ);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ fs_inst *inst =
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ msg_data,
+ interpolation);
+ set_predicate(BRW_PREDICATE_NORMAL, inst);
+
+ /* Continue the loop if there are any live channels left */
+ set_predicate_inv(BRW_PREDICATE_NORMAL,
+ true, /* inverse */
+ bld.emit(BRW_OPCODE_WHILE));
+ }
+ }
+
break;
}
@@ -1615,10 +1665,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
- fs_reg(off_x | (off_y << 4)));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(off_x | (off_y << 4)),
+ interpolation);
} else {
- src = vgrf(glsl_type::ivec2_type);
+ fs_reg src = vgrf(glsl_type::ivec2_type);
fs_reg offset_src = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_F);
for (int i = 0; i < 2; i++) {
@@ -1646,9 +1700,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
}
- mlen = 2 * dispatch_width / 8;
- inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
- fs_reg(0u));
+ const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
+ emit_pixel_interpolater_send(bld,
+ opcode,
+ dst_xy,
+ src,
+ fs_reg(0u),
+ interpolation);
}
break;
}
@@ -1657,12 +1715,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unreachable("Invalid intrinsic");
}
- inst->mlen = mlen;
- /* 2 floats per slot returned */
- inst->regs_written = 2 * dispatch_width / 8;
- inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
- INTERP_QUALIFIER_NOPERSPECTIVE;
-
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
src.type = dest.type;
@@ -1684,18 +1736,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_const_value *const_uniform_block =
nir_src_as_const_value(instr->src[1]);
if (const_uniform_block) {
- unsigned index = stage_prog_data->binding_table.ubo_start +
+ unsigned index = stage_prog_data->binding_table.ssbo_start +
const_uniform_block->u[0];
surf_index = fs_reg(index);
brw_mark_surface_used(prog_data, index);
} else {
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[1]),
- fs_reg(stage_prog_data->binding_table.ubo_start));
+ fs_reg(stage_prog_data->binding_table.ssbo_start));
surf_index = bld.emit_uniformize(surf_index);
brw_mark_surface_used(prog_data,
- stage_prog_data->binding_table.ubo_start +
+ stage_prog_data->binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -1780,17 +1832,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_ssbo_atomic_add:
nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
break;
- case nir_intrinsic_ssbo_atomic_min:
- if (dest.type == BRW_REGISTER_TYPE_D)
- nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
- else
- nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+ case nir_intrinsic_ssbo_atomic_imin:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
break;
- case nir_intrinsic_ssbo_atomic_max:
- if (dest.type == BRW_REGISTER_TYPE_D)
- nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
- else
- nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+ case nir_intrinsic_ssbo_atomic_umin:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_imax:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_umax:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
break;
case nir_intrinsic_ssbo_atomic_and:
nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
@@ -1810,7 +1862,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_get_buffer_size: {
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
- unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
int reg_width = dispatch_width / 8;
/* Set LOD = 0 */
@@ -1821,7 +1873,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
BRW_REGISTER_TYPE_UD);
bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
- fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
+ fs_reg surf_index = fs_reg(prog_data->binding_table.ssbo_start + ssbo_index);
fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
src_payload, surf_index);
inst->header_size = 0;
@@ -1874,20 +1926,20 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
fs_reg surface;
nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
if (const_surface) {
- unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+ unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
const_surface->u[0];
surface = fs_reg(surf_index);
brw_mark_surface_used(prog_data, surf_index);
} else {
surface = vgrf(glsl_type::uint_type);
bld.ADD(surface, get_nir_src(instr->src[0]),
- fs_reg(stage_prog_data->binding_table.ubo_start));
+ fs_reg(stage_prog_data->binding_table.ssbo_start));
- /* Assume this may touch any UBO. This is the same we do for other
+ /* Assume this may touch any SSBO. This is the same we do for other
* UBO/SSBO accesses with non-constant surface.
*/
brw_mark_surface_used(prog_data,
- stage_prog_data->binding_table.ubo_start +
+ stage_prog_data->binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index c3a037be4b1..36388fad98d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -27,7 +27,7 @@
#include "brw_fs.h"
#include "brw_cfg.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir_optimization.h"
using namespace brw;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
index e406c2899e8..8792a8c7b1d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
@@ -52,11 +52,12 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
ip--;
if (inst->opcode != BRW_OPCODE_MOV ||
+ !inst->saturate ||
inst->dst.file != GRF ||
+ inst->dst.type != inst->src[0].type ||
inst->src[0].file != GRF ||
inst->src[0].abs ||
- inst->src[0].negate ||
- !inst->saturate)
+ inst->src[0].negate)
continue;
int src_var = v->live_intervals->var_from_reg(inst->src[0]);
@@ -65,7 +66,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
bool interfered = false;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
if (scan_inst->overwrites_reg(inst->src[0])) {
- if (scan_inst->is_partial_write())
+ if (scan_inst->is_partial_write() ||
+ (scan_inst->dst.type != inst->dst.type &&
+ !scan_inst->can_change_types()))
break;
if (scan_inst->saturate) {
@@ -73,6 +76,12 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
progress = true;
} else if (src_end_ip <= ip || inst->dst.equals(inst->src[0])) {
if (scan_inst->can_do_saturate()) {
+ if (scan_inst->dst.type != inst->dst.type) {
+ scan_inst->dst.type = inst->dst.type;
+ for (int i = 0; i < scan_inst->sources; i++) {
+ scan_inst->src[i].type = inst->dst.type;
+ }
+ }
scan_inst->saturate = true;
inst->saturate = false;
progress = true;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
index d0e04f3bf47..814c551f1be 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
@@ -32,7 +32,7 @@
#define fsv_assert(cond) \
if (!(cond)) { \
- fprintf(stderr, "ASSERT: FS validation failed!\n"); \
+ fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", stage_abbrev); \
dump_instruction(inst, stderr); \
fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, #cond); \
abort(); \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 6000e35b9b9..cab5af318a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -42,7 +42,7 @@
#include "glsl/ir.h"
#include "glsl/ir_visitor.h"
#include "glsl/ir_rvalue_visitor.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "util/hash_table.h"
static bool debug = false;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index df1a7ed9b59..f825fed4daf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -43,7 +43,7 @@
#include "brw_vec4.h"
#include "brw_fs.h"
#include "main/uniforms.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir_optimization.h"
#include "program/sampler.h"
@@ -53,7 +53,8 @@ fs_reg *
fs_visitor::emit_vs_system_value(int location)
{
fs_reg *reg = new(this->mem_ctx)
- fs_reg(ATTR, VERT_ATTRIB_MAX, BRW_REGISTER_TYPE_D);
+ fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info.inputs_read),
+ BRW_REGISTER_TYPE_D);
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
switch (location) {
@@ -903,12 +904,9 @@ fs_visitor::emit_urb_writes()
urb_offset = 0;
flush = false;
for (slot = 0; slot < vue_map->num_slots; slot++) {
- fs_reg reg, src, zero;
-
int varying = vue_map->slot_to_varying[slot];
switch (varying) {
- case VARYING_SLOT_PSIZ:
-
+ case VARYING_SLOT_PSIZ: {
/* The point size varying slot is the vue header and is always in the
* vue map. But often none of the special varyings that live there
* are written and in that case we can skip writing to the vue
@@ -920,7 +918,7 @@ fs_visitor::emit_urb_writes()
break;
}
- zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg zero(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
bld.MOV(zero, fs_reg(0u));
sources[length++] = zero;
@@ -939,7 +937,7 @@ fs_visitor::emit_urb_writes()
else
sources[length++] = zero;
break;
-
+ }
case BRW_VARYING_SLOT_NDC:
case VARYING_SLOT_EDGE:
unreachable("unexpected scalar vs output");
@@ -972,8 +970,8 @@ fs_visitor::emit_urb_writes()
* temp register and use that for the payload.
*/
for (int i = 0; i < 4; i++) {
- reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
- src = offset(this->outputs[varying], bld, i);
+ fs_reg reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
+ fs_reg src = offset(this->outputs[varying], bld, i);
set_saturate(true, bld.MOV(reg, src));
sources[length++] = reg;
}
@@ -1069,7 +1067,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
const void *key,
struct brw_stage_prog_data *prog_data,
struct gl_program *prog,
- nir_shader *shader,
+ const nir_shader *shader,
unsigned dispatch_width,
int shader_time_index)
: backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index e0165fb4a23..10a7f28fdab 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -57,6 +57,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
struct brw_geometry_program *gp,
struct brw_gs_prog_key *key)
{
+ struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct brw_stage_state *stage_state = &brw->gs.base;
struct brw_gs_compile c;
memset(&c, 0, sizeof(c));
@@ -300,8 +301,11 @@ brw_codegen_gs_prog(struct brw_context *brw,
void *mem_ctx = ralloc_context(NULL);
unsigned program_size;
+ char *error_str;
const unsigned *program =
- brw_gs_emit(brw, prog, &c, mem_ctx, st_index, &program_size);
+ brw_compile_gs(brw->intelScreen->compiler, brw, &c,
+ shader->Program->nir, prog,
+ mem_ctx, st_index, &program_size, &error_str);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
index 0bb307432d0..00125c0f405 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -129,7 +129,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw)
ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
if (prog) {
- /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
&brw->gs.base, &brw->gs.prog_data->base.base);
}
@@ -137,6 +137,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw)
const struct brw_tracked_state brw_gs_image_surfaces = {
.dirty = {
+ .mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_GS_PROG_DATA |
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 97c6f8b2500..7726e4b78a0 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -204,6 +204,7 @@ public:
unsigned components_read(unsigned i) const;
int regs_read(int arg) const;
bool can_do_source_mods(const struct brw_device_info *devinfo);
+ bool can_change_types() const;
bool has_side_effects() const;
bool reads_flag() const;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 96dd633e117..1b57b65db27 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -179,6 +179,7 @@ public:
int swizzle, int swizzle_mask);
void reswizzle(int dst_writemask, int swizzle);
bool can_do_source_mods(const struct brw_device_info *devinfo);
+ bool can_change_types() const;
bool reads_flag()
{
diff --git a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp
index 8c59b9e415b..4219d471def 100644
--- a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp
+++ b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp
@@ -31,7 +31,7 @@
* \author Chris Forbes <[email protected]>
*/
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir.h"
#include "glsl/ir_builder.h"
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index eb201736c6e..fbde3f04204 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -451,6 +451,11 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
clear_type = REP_CLEAR;
+ if (brw->gen >= 9 && clear_type == FAST_CLEAR) {
+ perf_debug("fast MCS clears are disabled on gen9");
+ clear_type = REP_CLEAR;
+ }
+
/* We can't do scissored fast clears because of the restrictions on the
* fast clear rectangle size.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 0a9c09f1075..dc497770914 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -27,30 +27,112 @@
#include "glsl/nir/glsl_to_nir.h"
#include "program/prog_to_nir.h"
+static bool
+remap_vs_attrs(nir_block *block, void *closure)
+{
+ GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
+ assert(intrin->intrinsic != nir_intrinsic_load_input_indirect);
+
+ if (intrin->intrinsic == nir_intrinsic_load_input) {
+ /* Attributes come in a contiguous block, ordered by their
+ * gl_vert_attrib value. That means we can compute the slot
+ * number for an attribute by masking out the enabled attributes
+ * before it and counting the bits.
+ */
+ int attr = intrin->const_index[0];
+ int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
+ intrin->const_index[0] = 4 * slot;
+ }
+ }
+ return true;
+}
+
static void
brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
{
switch (nir->stage) {
+ case MESA_SHADER_VERTEX:
+ /* For now, leave the vec4 backend doing the old method. */
+ if (!is_scalar) {
+ nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
+ type_size_vec4);
+ break;
+ }
+
+ /* Start with the location of the variable's base. */
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+ var->data.driver_location = var->data.location;
+ }
+
+ /* Now use nir_lower_io to walk dereference chains. Attribute arrays
+ * are loaded as one vec4 per element (or matrix column), so we use
+ * type_size_vec4 here.
+ */
+ nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+
+ /* Finally, translate VERT_ATTRIB_* values into the actual registers.
+ *
+ * Note that we can use nir->info.inputs_read instead of key->inputs_read
+ * since the two are identical aside from Gen4-5 edge flag differences.
+ */
+ GLbitfield64 inputs_read = nir->info.inputs_read;
+ nir_foreach_overload(nir, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);
+ }
+ }
+ break;
case MESA_SHADER_GEOMETRY:
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
var->data.driver_location = var->data.location;
}
break;
- default:
+ case MESA_SHADER_FRAGMENT:
+ assert(is_scalar);
nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
- is_scalar ? type_size_scalar : type_size_vec4);
+ type_size_scalar);
+ break;
+ case MESA_SHADER_COMPUTE:
+ /* Compute shaders have no inputs. */
+ assert(exec_list_is_empty(&nir->inputs));
break;
+ default:
+ unreachable("unsupported shader stage");
}
}
static void
brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
{
- if (is_scalar) {
- nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar);
- } else {
- nir_foreach_variable(var, &nir->outputs)
- var->data.driver_location = var->data.location;
+ switch (nir->stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_GEOMETRY:
+ if (is_scalar) {
+ nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
+ type_size_scalar);
+ } else {
+ nir_foreach_variable(var, &nir->outputs)
+ var->data.driver_location = var->data.location;
+ }
+ break;
+ case MESA_SHADER_FRAGMENT:
+ nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
+ type_size_scalar);
+ break;
+ case MESA_SHADER_COMPUTE:
+ /* Compute shaders have no outputs. */
+ assert(exec_list_is_empty(&nir->outputs));
+ break;
+ default:
+ unreachable("unsupported shader stage");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index dbd0e50228b..22b0227756e 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -69,8 +69,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
- return _mesa_init_vertex_program( ctx, &prog->program,
- target, id );
+ return _mesa_init_gl_program(&prog->program.Base, target, id);
}
else
return NULL;
@@ -81,8 +80,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
- return _mesa_init_fragment_program( ctx, &prog->program,
- target, id );
+ return _mesa_init_gl_program(&prog->program.Base, target, id);
}
else
return NULL;
@@ -93,7 +91,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
- return _mesa_init_geometry_program(ctx, &prog->program, target, id);
+ return _mesa_init_gl_program(&prog->program, target, id);
} else {
return NULL;
}
@@ -104,7 +102,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
- return _mesa_init_compute_program(ctx, &prog->program, target, id);
+ return _mesa_init_gl_program(&prog->program.Base, target, id);
} else {
return NULL;
}
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index cf0522a8b10..f8cf2b062c8 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -24,129 +24,7 @@
#ifndef BRW_PROGRAM_H
#define BRW_PROGRAM_H
-/**
- * Program key structures.
- *
- * When drawing, we look for the currently bound shaders in the program
- * cache. This is essentially a hash table lookup, and these are the keys.
- *
- * Sometimes OpenGL features specified as state need to be simulated via
- * shader code, due to a mismatch between the API and the hardware. This
- * is often referred to as "non-orthagonal state" or "NOS". We store NOS
- * in the program key so it's considered when searching for a program. If
- * we haven't seen a particular combination before, we have to recompile a
- * new specialized version.
- *
- * Shader compilation should not look up state in gl_context directly, but
- * instead use the copy in the program key. This guarantees recompiles will
- * happen correctly.
- *
- * @{
- */
-
-enum PACKED gen6_gather_sampler_wa {
- WA_SIGN = 1, /* whether we need to sign extend */
- WA_8BIT = 2, /* if we have an 8bit format needing wa */
- WA_16BIT = 4, /* if we have a 16bit format needing wa */
-};
-
-/**
- * Sampler information needed by VS, WM, and GS program cache keys.
- */
-struct brw_sampler_prog_key_data {
- /**
- * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles.
- */
- uint16_t swizzles[MAX_SAMPLERS];
-
- uint32_t gl_clamp_mask[3];
-
- /**
- * For RG32F, gather4's channel select is broken.
- */
- uint32_t gather_channel_quirk_mask;
-
- /**
- * Whether this sampler uses the compressed multisample surface layout.
- */
- uint32_t compressed_multisample_layout_mask;
-
- /**
- * For Sandybridge, which shader w/a we need for gather quirks.
- */
- enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
-};
-
-
-/** The program key for Vertex Shaders. */
-struct brw_vs_prog_key {
- unsigned program_string_id;
-
- /*
- * Per-attribute workaround flags
- */
- uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
-
- bool copy_edgeflag:1;
-
- bool clamp_vertex_color:1;
-
- /**
- * How many user clipping planes are being uploaded to the vertex shader as
- * push constants.
- *
- * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
- * clip distances.
- */
- unsigned nr_userclip_plane_consts:4;
-
- /**
- * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
- * are going to be replaced with point coordinates (as a consequence of a
- * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
- * our SF thread requires exact matching between VS outputs and FS inputs,
- * these texture coordinates will need to be unconditionally included in
- * the VUE, even if they aren't written by the vertex shader.
- */
- uint8_t point_coord_replace;
-
- struct brw_sampler_prog_key_data tex;
-};
-
-/** The program key for Geometry Shaders. */
-struct brw_gs_prog_key
-{
- unsigned program_string_id;
-
- struct brw_sampler_prog_key_data tex;
-};
-
-/** The program key for Fragment/Pixel Shaders. */
-struct brw_wm_prog_key {
- uint8_t iz_lookup;
- bool stats_wm:1;
- bool flat_shade:1;
- bool persample_shading:1;
- bool persample_2x:1;
- unsigned nr_color_regions:5;
- bool replicate_alpha:1;
- bool render_to_fbo:1;
- bool clamp_fragment_color:1;
- bool compute_pos_offset:1;
- bool compute_sample_id:1;
- unsigned line_aa:2;
- bool high_quality_derivatives:1;
-
- uint16_t drawable_height;
- uint64_t input_slots_valid;
- unsigned program_string_id;
- GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
- float alpha_test_ref;
-
- struct brw_sampler_prog_key_data tex;
-};
-
-/** @} */
+#include "brw_compiler.h"
#ifdef __cplusplus
extern "C" {
diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c
index c2db5f69560..6d73444dad0 100644
--- a/src/mesa/drivers/dri/i965/brw_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c
@@ -44,6 +44,7 @@
#include "main/macros.h"
#include "main/samplerobj.h"
+#include "util/half_float.h"
/**
* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet.
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 4e43e5ccdbd..b710c60148c 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -29,7 +29,7 @@
#include "brw_vec4.h"
#include "brw_cfg.h"
#include "brw_shader.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir_optimization.h"
using namespace brw;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 3a58a58a00b..6be2a6e5b55 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -660,7 +660,7 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
backend_shader::backend_shader(const struct brw_compiler *compiler,
void *log_data,
void *mem_ctx,
- nir_shader *shader,
+ const nir_shader *shader,
struct brw_stage_prog_data *stage_prog_data)
: compiler(compiler),
log_data(log_data),
@@ -1131,11 +1131,16 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
next_binding_table_offset += num_textures;
if (shader) {
- assert(shader->NumUniformBlocks <= BRW_MAX_COMBINED_UBO_SSBO);
+ assert(shader->NumUniformBlocks <= BRW_MAX_UBO);
stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
next_binding_table_offset += shader->NumUniformBlocks;
+
+ assert(shader->NumShaderStorageBlocks <= BRW_MAX_SSBO);
+ stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
+ next_binding_table_offset += shader->NumShaderStorageBlocks;
} else {
stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
+ stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
}
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index ad2de5eae2d..b33b08f40d7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -38,64 +38,6 @@
#define MAX_SAMPLER_MESSAGE_SIZE 11
#define MAX_VGRF_SIZE 16
-struct brw_compiler {
- const struct brw_device_info *devinfo;
-
- struct {
- struct ra_regs *regs;
-
- /**
- * Array of the ra classes for the unaligned contiguous register
- * block sizes used.
- */
- int *classes;
-
- /**
- * Mapping for register-allocated objects in *regs to the first
- * GRF for that object.
- */
- uint8_t *ra_reg_to_grf;
- } vec4_reg_set;
-
- struct {
- struct ra_regs *regs;
-
- /**
- * Array of the ra classes for the unaligned contiguous register
- * block sizes used, indexed by register size.
- */
- int classes[16];
-
- /**
- * Mapping from classes to ra_reg ranges. Each of the per-size
- * classes corresponds to a range of ra_reg nodes. This array stores
- * those ranges in the form of first ra_reg in each class and the
- * total number of ra_reg elements in the last array element. This
- * way the range of the i'th class is given by:
- * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
- */
- int class_to_ra_reg_range[17];
-
- /**
- * Mapping for register-allocated objects in *regs to the first
- * GRF for that object.
- */
- uint8_t *ra_reg_to_grf;
-
- /**
- * ra class for the aligned pairs we use for PLN, which doesn't
- * appear in *classes.
- */
- int aligned_pairs_class;
- } fs_reg_sets[2];
-
- void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
- void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
-
- bool scalar_vs;
- struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
-};
-
enum PACKED register_file {
BAD_FILE,
GRF,
@@ -225,7 +167,7 @@ protected:
backend_shader(const struct brw_compiler *compiler,
void *log_data,
void *mem_ctx,
- nir_shader *shader,
+ const nir_shader *shader,
struct brw_stage_prog_data *stage_prog_data);
public:
@@ -234,7 +176,7 @@ public:
void *log_data; /* Passed to compiler->*_log functions */
const struct brw_device_info * const devinfo;
- nir_shader *nir;
+ const nir_shader *nir;
struct brw_stage_prog_data * const stage_prog_data;
/** ralloc context for temporary data used during compile */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index e966b96a5ca..befc92445d3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -280,6 +280,18 @@ vec4_instruction::can_do_source_mods(const struct brw_device_info *devinfo)
return true;
}
+bool
+vec4_instruction::can_change_types() const
+{
+ return dst.type == src[0].type &&
+ !src[0].abs && !src[0].negate && !saturate &&
+ (opcode == BRW_OPCODE_MOV ||
+ (opcode == BRW_OPCODE_SEL &&
+ dst.type == src[1].type &&
+ predicate != BRW_PREDICATE_NONE &&
+ !src[1].abs && !src[1].negate));
+}
+
/**
* Returns how many MRFs an opcode will write over.
*
@@ -1632,28 +1644,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
*/
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
- nr_attributes++;
}
lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
- /* The BSpec says we always have to read at least one thing from
- * the VF, and it appears that the hardware wedges otherwise.
- */
- if (nr_attributes == 0)
- nr_attributes = 1;
-
- prog_data->urb_read_length = (nr_attributes + 1) / 2;
-
- unsigned vue_entries =
- MAX2(nr_attributes, prog_data->vue_map.num_slots);
-
- if (devinfo->gen == 6)
- prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8;
- else
- prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4;
-
- return payload_reg + nr_attributes;
+ return payload_reg + vs_prog_data->nr_attributes;
}
int
@@ -1937,51 +1932,76 @@ extern "C" {
* Returns the final assembly and the program's size.
*/
const unsigned *
-brw_vs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_vs_prog_key *key,
- struct brw_vs_prog_data *prog_data,
- struct gl_vertex_program *vp,
- struct gl_shader_program *prog,
- int shader_time_index,
- unsigned *final_assembly_size)
+brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_vs_prog_key *key,
+ struct brw_vs_prog_data *prog_data,
+ const nir_shader *shader,
+ gl_clip_plane *clip_planes,
+ bool use_legacy_snorm_formula,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
{
const unsigned *assembly = NULL;
- if (brw->intelScreen->compiler->scalar_vs) {
+ unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read);
+
+ /* gl_VertexID and gl_InstanceID are system values, but arrive via an
+ * incoming vertex attribute. So, add an extra slot.
+ */
+ if (shader->info.system_values_read &
+ (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
+ BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
+ nr_attributes++;
+ }
+
+ /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
+ * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in
+ * vec4 mode, the hardware appears to wedge unless we read something.
+ */
+ if (compiler->scalar_vs)
+ prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2);
+ else
+ prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attributes, 1), 2);
+
+ prog_data->nr_attributes = nr_attributes;
+
+ /* Since vertex shaders reuse the same VUE entry for inputs and outputs
+ * (overwriting the original contents), we need to make sure the size is
+ * the larger of the two.
+ */
+ const unsigned vue_entries =
+ MAX2(nr_attributes, (unsigned)prog_data->base.vue_map.num_slots);
+
+ if (compiler->devinfo->gen == 6)
+ prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8);
+ else
+ prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
+
+ if (compiler->scalar_vs) {
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
- fs_visitor v(brw->intelScreen->compiler, brw,
- mem_ctx, key, &prog_data->base.base,
+ fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base,
NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */
- vp->Base.nir, 8, shader_time_index);
- if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
- if (prog) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, v.fail_msg);
- }
-
- _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
- v.fail_msg);
+ shader, 8, shader_time_index);
+ if (!v.run_vs(clip_planes)) {
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
return NULL;
}
- fs_generator g(brw->intelScreen->compiler, brw,
- mem_ctx, (void *) key, &prog_data->base.base,
- &vp->Base, v.promoted_constants,
+ fs_generator g(compiler, log_data, mem_ctx, (void *) key,
+ &prog_data->base.base, v.promoted_constants,
v.runtime_check_aads_emit, "VS");
if (INTEL_DEBUG & DEBUG_VS) {
- char *name;
- if (prog) {
- name = ralloc_asprintf(mem_ctx, "%s vertex shader %d",
- prog->Label ? prog->Label : "unnamed",
- prog->Name);
- } else {
- name = ralloc_asprintf(mem_ctx, "vertex program %d",
- vp->Base.Id);
- }
- g.enable_debug(name);
+ const char *debug_name =
+ ralloc_asprintf(mem_ctx, "%s vertex shader %s",
+ shader->info.label ? shader->info.label : "unnamed",
+ shader->info.name);
+
+ g.enable_debug(debug_name);
}
g.generate_code(v.cfg, 8);
assembly = g.get_assembly(final_assembly_size);
@@ -1990,26 +2010,19 @@ brw_vs_emit(struct brw_context *brw,
if (!assembly) {
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
- vp->Base.nir, brw_select_clip_planes(&brw->ctx),
- mem_ctx, shader_time_index,
- !_mesa_is_gles3(&brw->ctx));
+ vec4_vs_visitor v(compiler, log_data, key, prog_data,
+ shader, clip_planes, mem_ctx,
+ shader_time_index, use_legacy_snorm_formula);
if (!v.run()) {
- if (prog) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, v.fail_msg);
- }
-
- _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
- v.fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
return NULL;
}
- vec4_generator g(brw->intelScreen->compiler, brw,
- prog, &vp->Base, &prog_data->base,
+ vec4_generator g(compiler, log_data, &prog_data->base,
mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS");
- assembly = g.generate_assembly(v.cfg, final_assembly_size);
+ assembly = g.generate_assembly(v.cfg, final_assembly_size, shader);
}
return assembly;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5e3500c0c9a..d861b2e85df 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -72,7 +72,7 @@ public:
void *log_data,
const struct brw_sampler_prog_key_data *key,
struct brw_vue_prog_data *prog_data,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index);
@@ -391,8 +391,6 @@ class vec4_generator
{
public:
vec4_generator(const struct brw_compiler *compiler, void *log_data,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
struct brw_vue_prog_data *prog_data,
void *mem_ctx,
bool debug_flag,
@@ -400,10 +398,11 @@ public:
const char *stage_abbrev);
~vec4_generator();
- const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
+ const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size,
+ const nir_shader *nir);
private:
- void generate_code(const cfg_t *cfg);
+ void generate_code(const cfg_t *cfg, const nir_shader *nir);
void generate_math1_gen4(vec4_instruction *inst,
struct brw_reg dst,
@@ -485,9 +484,6 @@ private:
struct brw_codegen *p;
- struct gl_shader_program *shader_prog;
- const struct gl_program *prog;
-
struct brw_vue_prog_data *prog_data;
void *mem_ctx;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 610caef7dce..db99ecba35a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -256,18 +256,6 @@ try_constant_propagate(const struct brw_device_info *devinfo,
}
static bool
-can_change_source_types(vec4_instruction *inst)
-{
- return inst->dst.type == inst->src[0].type &&
- !inst->src[0].abs && !inst->src[0].negate && !inst->saturate &&
- (inst->opcode == BRW_OPCODE_MOV ||
- (inst->opcode == BRW_OPCODE_SEL &&
- inst->dst.type == inst->src[1].type &&
- inst->predicate != BRW_PREDICATE_NONE &&
- !inst->src[1].abs && !inst->src[1].negate));
-}
-
-static bool
try_copy_propagate(const struct brw_device_info *devinfo,
vec4_instruction *inst,
int arg, struct copy_entry *entry)
@@ -325,7 +313,7 @@ try_copy_propagate(const struct brw_device_info *devinfo,
if (has_source_modifiers &&
value.type != inst->src[arg].type &&
- !can_change_source_types(inst))
+ !inst->can_change_types())
return false;
if (has_source_modifiers &&
@@ -394,7 +382,7 @@ try_copy_propagate(const struct brw_device_info *devinfo,
value.swizzle = composed_swizzle;
if (has_source_modifiers &&
value.type != inst->src[arg].type) {
- assert(can_change_source_types(inst));
+ assert(inst->can_change_types());
for (int i = 0; i < 3; i++) {
inst->src[i].type = value.type;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index dcacc900540..a84f6c47471 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -21,6 +21,7 @@
*/
#include <ctype.h>
+#include "glsl/glsl_parser_extras.h"
#include "brw_vec4.h"
#include "brw_cfg.h"
@@ -137,15 +138,13 @@ vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i)
vec4_generator::vec4_generator(const struct brw_compiler *compiler,
void *log_data,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
struct brw_vue_prog_data *prog_data,
void *mem_ctx,
bool debug_flag,
const char *stage_name,
const char *stage_abbrev)
: compiler(compiler), log_data(log_data), devinfo(compiler->devinfo),
- shader_prog(shader_prog), prog(prog), prog_data(prog_data),
+ prog_data(prog_data),
mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev),
debug_flag(debug_flag)
{
@@ -1142,7 +1141,7 @@ vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
}
void
-vec4_generator::generate_code(const cfg_t *cfg)
+vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
{
struct annotation_info annotation;
memset(&annotation, 0, sizeof(annotation));
@@ -1648,14 +1647,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
int after_size = p->next_insn_offset;
if (unlikely(debug_flag)) {
- if (shader_prog) {
- fprintf(stderr, "Native code for %s %s shader %d:\n",
- shader_prog->Label ? shader_prog->Label : "unnamed",
- stage_name, shader_prog->Name);
- } else {
- fprintf(stderr, "Native code for %s program %d:\n", stage_name,
- prog->Id);
- }
+ fprintf(stderr, "Native code for %s %s shader %s:\n",
+ nir->info.label ? nir->info.label : "unnamed",
+ _mesa_shader_stage_to_string(nir->stage), nir->info.name);
+
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d"
" bytes (%.0f%%)\n",
stage_abbrev,
@@ -1663,7 +1658,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
100.0f * (before_size - after_size) / before_size);
dump_assembly(p->store, annotation.ann_count, annotation.ann,
- p->devinfo, prog);
+ p->devinfo);
ralloc_free(annotation.ann);
}
@@ -1676,10 +1671,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
const unsigned *
vec4_generator::generate_assembly(const cfg_t *cfg,
- unsigned *assembly_size)
+ unsigned *assembly_size,
+ const nir_shader *nir)
{
brw_set_default_access_mode(p, BRW_ALIGN_16);
- generate_code(cfg);
+ generate_code(cfg, nir);
return brw_get_program(p, assembly_size);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 4ce471e0669..a715cf5a6cb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -30,14 +30,12 @@
#include "brw_vec4_gs_visitor.h"
#include "gen6_gs_visitor.h"
-const unsigned MAX_GS_INPUT_VERTICES = 6;
-
namespace brw {
vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index)
@@ -598,32 +596,17 @@ vec4_gs_visitor::gs_end_primitive()
emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
}
-static const unsigned *
-generate_assembly(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_vue_prog_data *prog_data,
- void *mem_ctx,
- const cfg_t *cfg,
- unsigned *final_assembly_size)
-{
- vec4_generator g(brw->intelScreen->compiler, brw,
- shader_prog, prog, prog_data, mem_ctx,
- INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
- return g.generate_assembly(cfg, final_assembly_size);
-}
-
extern "C" const unsigned *
-brw_gs_emit(struct brw_context *brw,
- struct gl_shader_program *prog,
- struct brw_gs_compile *c,
- void *mem_ctx,
- int shader_time_index,
- unsigned *final_assembly_size)
+brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
+ struct brw_gs_compile *c,
+ const nir_shader *shader,
+ struct gl_shader_program *shader_prog,
+ void *mem_ctx,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
{
- struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
-
- if (brw->gen >= 7) {
+ if (compiler->devinfo->gen >= 7) {
/* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
* so without spilling. If the GS invocations count > 1, then we can't use
* dual object mode.
@@ -632,13 +615,12 @@ brw_gs_emit(struct brw_context *brw,
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_gs_visitor v(brw->intelScreen->compiler, brw,
- c, shader->Program->nir,
+ vec4_gs_visitor v(compiler, log_data, c, shader,
mem_ctx, true /* no_spills */, shader_time_index);
if (v.run()) {
- return generate_assembly(brw, prog, &c->gp->program.Base,
- &c->prog_data.base, mem_ctx, v.cfg,
- final_assembly_size);
+ vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
+ INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
+ return g.generate_assembly(v.cfg, final_assembly_size, shader);
}
}
}
@@ -666,7 +648,7 @@ brw_gs_emit(struct brw_context *brw,
* mode is more performant when invocations > 1. Gen6 only supports
* SINGLE mode.
*/
- if (c->prog_data.invocations <= 1 || brw->gen < 7)
+ if (c->prog_data.invocations <= 1 || compiler->devinfo->gen < 7)
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
else
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
@@ -674,24 +656,22 @@ brw_gs_emit(struct brw_context *brw,
vec4_gs_visitor *gs = NULL;
const unsigned *ret = NULL;
- if (brw->gen >= 7)
- gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw,
- c, shader->Program->nir,
+ if (compiler->devinfo->gen >= 7)
+ gs = new vec4_gs_visitor(compiler, log_data, c, shader,
mem_ctx, false /* no_spills */,
shader_time_index);
else
- gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw,
- c, prog, shader->Program->nir,
+ gs = new gen6_gs_visitor(compiler, log_data, c, shader_prog, shader,
mem_ctx, false /* no_spills */,
shader_time_index);
if (!gs->run()) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, gs->fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
- ret = generate_assembly(brw, prog, &c->gp->program.Base,
- &c->prog_data.base, mem_ctx, gs->cfg,
- final_assembly_size);
+ vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
+ INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
+ ret = g.generate_assembly(gs->cfg, final_assembly_size, shader);
}
delete gs;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index 3ff195c3e68..c52552768c8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -32,36 +32,6 @@
#include "brw_vec4.h"
-/**
- * Scratch data used when compiling a GLSL geometry shader.
- */
-struct brw_gs_compile
-{
- struct brw_gs_prog_key key;
- struct brw_gs_prog_data prog_data;
- struct brw_vue_map input_vue_map;
-
- struct brw_geometry_program *gp;
-
- unsigned control_data_bits_per_vertex;
- unsigned control_data_header_size_bits;
-};
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-const unsigned *brw_gs_emit(struct brw_context *brw,
- struct gl_shader_program *prog,
- struct brw_gs_compile *c,
- void *mem_ctx,
- int shader_time_index,
- unsigned *final_assembly_size);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
#ifdef __cplusplus
namespace brw {
@@ -71,7 +41,7 @@ public:
vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index cc688ef8083..678237901f2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -291,15 +291,15 @@ vec4_visitor::calculate_live_intervals()
struct block_data *bd = &live_intervals->block_data[block->num];
for (int i = 0; i < live_intervals->num_vars; i++) {
- if (BITSET_TEST(bd->livein, i)) {
- start[i] = MIN2(start[i], block->start_ip);
- end[i] = MAX2(end[i], block->start_ip);
- }
+ if (BITSET_TEST(bd->livein, i)) {
+ start[i] = MIN2(start[i], block->start_ip);
+ end[i] = MAX2(end[i], block->start_ip);
+ }
- if (BITSET_TEST(bd->liveout, i)) {
- start[i] = MIN2(start[i], block->end_ip);
- end[i] = MAX2(end[i], block->end_ip);
- }
+ if (BITSET_TEST(bd->liveout, i)) {
+ start[i] = MIN2(start[i], block->end_ip);
+ end[i] = MAX2(end[i], block->end_ip);
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 41bd80df377..ea1e3e7bbcf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -423,10 +423,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_get_buffer_size: {
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
- unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
- src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
- ubo_index);
+ src_reg surf_index = src_reg(prog_data->base.binding_table.ssbo_start +
+ ssbo_index);
dst_reg result_dst = get_nir_dest(instr->dest);
vec4_instruction *inst = new(mem_ctx)
vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
@@ -456,18 +456,18 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_const_value *const_uniform_block =
nir_src_as_const_value(instr->src[1]);
if (const_uniform_block) {
- unsigned index = prog_data->base.binding_table.ubo_start +
+ unsigned index = prog_data->base.binding_table.ssbo_start +
const_uniform_block->u[0];
surf_index = src_reg(index);
brw_mark_surface_used(&prog_data->base, index);
} else {
surf_index = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
- src_reg(prog_data->base.binding_table.ubo_start)));
+ src_reg(prog_data->base.binding_table.ssbo_start)));
surf_index = emit_uniformize(surf_index);
brw_mark_surface_used(&prog_data->base,
- prog_data->base.binding_table.ubo_start +
+ prog_data->base.binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -599,7 +599,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg surf_index;
if (const_uniform_block) {
- unsigned index = prog_data->base.binding_table.ubo_start +
+ unsigned index = prog_data->base.binding_table.ssbo_start +
const_uniform_block->u[0];
surf_index = src_reg(index);
@@ -607,14 +607,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
} else {
surf_index = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
- src_reg(prog_data->base.binding_table.ubo_start)));
+ src_reg(prog_data->base.binding_table.ssbo_start)));
surf_index = emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
*/
brw_mark_surface_used(&prog_data->base,
- prog_data->base.binding_table.ubo_start +
+ prog_data->base.binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -645,17 +645,17 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_ssbo_atomic_add:
nir_emit_ssbo_atomic(BRW_AOP_ADD, instr);
break;
- case nir_intrinsic_ssbo_atomic_min:
- if (dest.type == BRW_REGISTER_TYPE_D)
- nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
- else
- nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
+ case nir_intrinsic_ssbo_atomic_imin:
+ nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_umin:
+ nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
break;
- case nir_intrinsic_ssbo_atomic_max:
- if (dest.type == BRW_REGISTER_TYPE_D)
- nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
- else
- nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
+ case nir_intrinsic_ssbo_atomic_imax:
+ nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_umax:
+ nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
break;
case nir_intrinsic_ssbo_atomic_and:
nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
@@ -765,7 +765,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
*/
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- nir->info.num_ssbos - 1);
+ nir->info.num_ubos - 1);
}
unsigned const_offset = instr->const_index[0];
@@ -821,20 +821,20 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
src_reg surface;
nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
if (const_surface) {
- unsigned surf_index = prog_data->base.binding_table.ubo_start +
+ unsigned surf_index = prog_data->base.binding_table.ssbo_start +
const_surface->u[0];
surface = src_reg(surf_index);
brw_mark_surface_used(&prog_data->base, surf_index);
} else {
surface = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
- src_reg(prog_data->base.binding_table.ubo_start)));
+ src_reg(prog_data->base.binding_table.ssbo_start)));
/* Assume this may touch any UBO. This is the same we do for other
* UBO/SSBO accesses with non-constant surface.
*/
brw_mark_surface_used(&prog_data->base,
- prog_data->base.binding_table.ubo_start +
+ prog_data->base.binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
@@ -1237,14 +1237,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_b2i:
- emit(AND(dst, op[0], src_reg(1)));
- break;
-
case nir_op_b2f:
- op[0].type = BRW_REGISTER_TYPE_D;
- dst.type = BRW_REGISTER_TYPE_D;
- emit(AND(dst, op[0], src_reg(0x3f800000u)));
- dst.type = BRW_REGISTER_TYPE_F;
+ emit(MOV(dst, negate(op[0])));
break;
case nir_op_f2b:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 98ea9be6ee4..5be9c6a6b2d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1815,7 +1815,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
void *log_data,
const struct brw_sampler_prog_key_data *key_tex,
struct brw_vue_prog_data *prog_data,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index b6e1971c2ee..485a80ee2fc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -301,7 +301,7 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
void *log_data,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
- nir_shader *shader,
+ const nir_shader *shader,
gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 38de98fab86..ba680a98f7e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -31,6 +31,7 @@
#include "main/compiler.h"
+#include "main/context.h"
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_util.h"
@@ -57,18 +58,6 @@ brw_codegen_vs_prog(struct brw_context *brw,
bool start_busy = false;
double start_time = 0;
- if (!vp->program.Base.nir) {
- /* Normally we generate NIR in LinkShader() or
- * ProgramStringNotify(), but Mesa's fixed-function vertex program
- * handling doesn't notify the driver at all. Just do it here, at
- * the last minute, even though it's lame.
- */
- assert(vp->program.Base.Id == 0 && prog == NULL);
- vp->program.Base.nir =
- brw_create_nir(brw, NULL, &vp->program.Base, MESA_SHADER_VERTEX,
- brw->intelScreen->compiler->scalar_vs);
- }
-
if (prog)
vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
@@ -171,7 +160,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
}
if (unlikely(INTEL_DEBUG & DEBUG_VS))
- brw_dump_ir("vertex", prog, &vs->base, &vp->program.Base);
+ brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base);
int st_index = -1;
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
@@ -179,9 +168,20 @@ brw_codegen_vs_prog(struct brw_context *brw,
/* Emit GEN4 code.
*/
- program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
- &vp->program, prog, st_index, &program_size);
+ char *error_str;
+ program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data, vp->program.Base.nir,
+ brw_select_clip_planes(&brw->ctx),
+ !_mesa_is_gles3(&brw->ctx),
+ st_index, &program_size, &error_str);
if (program == NULL) {
+ if (prog) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, error_str);
+ }
+
+ _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str);
+
ralloc_free(mem_ctx);
return false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index f1242f61b33..bcb5e7b0b2a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -54,14 +54,6 @@
extern "C" {
#endif
-const unsigned *brw_vs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_vs_prog_key *key,
- struct brw_vs_prog_data *prog_data,
- struct gl_vertex_program *vp,
- struct gl_shader_program *shader_prog,
- int shader_time_index,
- unsigned *program_size);
void brw_vs_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
const struct brw_vs_prog_key *key);
@@ -88,7 +80,7 @@ public:
void *log_data,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
- nir_shader *shader,
+ const nir_shader *shader,
gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 9bb48eb2e27..f65258a52a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -201,7 +201,7 @@ brw_upload_vs_image_surfaces(struct brw_context *brw)
ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
if (prog) {
- /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
&brw->vs.base, &brw->vs.prog_data->base.base);
}
@@ -209,6 +209,7 @@ brw_upload_vs_image_surfaces(struct brw_context *brw)
const struct brw_tracked_state brw_vs_image_surfaces = {
.dirty = {
+ .mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
BRW_NEW_IMAGE_UNITS |
BRW_NEW_VERTEX_PROGRAM |
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 4d5e7f67bd6..5c49db9e63e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -39,89 +39,6 @@
#include "util/ralloc.h"
-/**
- * Return a bitfield where bit n is set if barycentric interpolation mode n
- * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
- */
-static unsigned
-brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
- bool shade_model_flat,
- bool persample_shading,
- nir_shader *shader)
-{
- unsigned barycentric_interp_modes = 0;
-
- nir_foreach_variable(var, &shader->inputs) {
- enum glsl_interp_qualifier interp_qualifier = var->data.interpolation;
- bool is_centroid = var->data.centroid && !persample_shading;
- bool is_sample = var->data.sample || persample_shading;
- bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) ||
- (var->data.location == VARYING_SLOT_COL1);
-
- /* Ignore WPOS and FACE, because they don't require interpolation. */
- if (var->data.location == VARYING_SLOT_POS ||
- var->data.location == VARYING_SLOT_FACE)
- continue;
-
- /* Determine the set (or sets) of barycentric coordinates needed to
- * interpolate this variable. Note that when
- * brw->needs_unlit_centroid_workaround is set, centroid interpolation
- * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
- * for lit pixels, so we need both sets of barycentric coordinates.
- */
- if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) {
- if (is_centroid) {
- barycentric_interp_modes |=
- 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
- } else if (is_sample) {
- barycentric_interp_modes |=
- 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC;
- }
- if ((!is_centroid && !is_sample) ||
- devinfo->needs_unlit_centroid_workaround) {
- barycentric_interp_modes |=
- 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
- }
- } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH ||
- (!(shade_model_flat && is_gl_Color) &&
- interp_qualifier == INTERP_QUALIFIER_NONE)) {
- if (is_centroid) {
- barycentric_interp_modes |=
- 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
- } else if (is_sample) {
- barycentric_interp_modes |=
- 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC;
- }
- if ((!is_centroid && !is_sample) ||
- devinfo->needs_unlit_centroid_workaround) {
- barycentric_interp_modes |=
- 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
- }
- }
- }
-
- return barycentric_interp_modes;
-}
-
-static uint8_t
-computed_depth_mode(struct gl_fragment_program *fp)
-{
- if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
- switch (fp->FragDepthLayout) {
- case FRAG_DEPTH_LAYOUT_NONE:
- case FRAG_DEPTH_LAYOUT_ANY:
- return BRW_PSCDEPTH_ON;
- case FRAG_DEPTH_LAYOUT_GREATER:
- return BRW_PSCDEPTH_ON_GE;
- case FRAG_DEPTH_LAYOUT_LESS:
- return BRW_PSCDEPTH_ON_LE;
- case FRAG_DEPTH_LAYOUT_UNCHANGED:
- return BRW_PSCDEPTH_OFF;
- }
- }
- return BRW_PSCDEPTH_OFF;
-}
-
static void
assign_fs_binding_table_offsets(const struct brw_device_info *devinfo,
const struct gl_shader_program *shader_prog,
@@ -166,15 +83,6 @@ brw_codegen_wm_prog(struct brw_context *brw,
fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
memset(&prog_data, 0, sizeof(prog_data));
- /* key->alpha_test_func means simulating alpha testing via discards,
- * so the shader definitely kills pixels.
- */
- prog_data.uses_kill = fp->program.UsesKill || key->alpha_test_func;
- prog_data.uses_omask =
- fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
- prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
-
- prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests;
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
if (!prog)
@@ -209,12 +117,6 @@ brw_codegen_wm_prog(struct brw_context *brw,
&prog_data.base);
}
- prog_data.barycentric_interp_modes =
- brw_compute_barycentric_interp_modes(brw->intelScreen->devinfo,
- key->flat_shade,
- key->persample_shading,
- fp->program.Base.nir);
-
if (unlikely(brw->perf_debug)) {
start_busy = (brw->batch.last_bo &&
drm_intel_bo_busy(brw->batch.last_bo));
@@ -222,7 +124,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
}
if (unlikely(INTEL_DEBUG & DEBUG_WM))
- brw_dump_ir("fragment", prog, &fs->base, &fp->program.Base);
+ brw_dump_ir("fragment", prog, fs ? &fs->base : NULL, &fp->program.Base);
int st_index8 = -1, st_index16 = -1;
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
@@ -230,9 +132,19 @@ brw_codegen_wm_prog(struct brw_context *brw,
st_index16 = brw_get_shader_time_index(brw, prog, &fp->program.Base, ST_FS16);
}
- program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
- &fp->program, prog, st_index8, st_index16, &program_size);
+ char *error_str = NULL;
+ program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx,
+ key, &prog_data, fp->program.Base.nir,
+ &fp->program.Base, st_index8, st_index16,
+ brw->use_rep_send, &program_size, &error_str);
if (program == NULL) {
+ if (prog) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, error_str);
+ }
+
+ _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", error_str);
+
ralloc_free(mem_ctx);
return false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 6ee22b2f907..53a642ee8bb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -61,21 +61,6 @@
extern "C" {
#endif
-/**
- * Compile a fragment shader.
- *
- * Returns the final assembly and the program's size.
- */
-const unsigned *brw_wm_fs_emit(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_wm_prog_key *key,
- struct brw_wm_prog_data *prog_data,
- struct gl_fragment_program *fp,
- struct gl_shader_program *prog,
- int shader_time_index8,
- int shader_time_index16,
- unsigned *final_assembly_size);
-
GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index c671e23827e..6ebe6481c32 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -34,6 +34,7 @@
#include "main/blend.h"
#include "main/mtypes.h"
#include "main/samplerobj.h"
+#include "main/shaderimage.h"
#include "program/prog_parameter.h"
#include "main/framebuffer.h"
@@ -925,54 +926,53 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
if (!shader)
return;
- uint32_t *surf_offsets =
+ uint32_t *ubo_surf_offsets =
&stage_state->surf_offset[prog_data->binding_table.ubo_start];
for (int i = 0; i < shader->NumUniformBlocks; i++) {
- struct intel_buffer_object *intel_bo;
+ struct gl_uniform_buffer_binding *binding =
+ &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
- /* Because behavior for referencing outside of the binding's size in the
- * glBindBufferRange case is undefined, we can just bind the whole buffer
- * glBindBufferBase wants and be a correct implementation.
- */
- if (!shader->UniformBlocks[i].IsShaderStorage) {
- struct gl_uniform_buffer_binding *binding;
- binding =
- &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
- if (binding->BufferObject == ctx->Shared->NullBufferObj) {
- brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
- } else {
- intel_bo = intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo =
- intel_bufferobj_buffer(brw, intel_bo,
- binding->Offset,
- binding->BufferObject->Size - binding->Offset);
- brw_create_constant_surface(brw, bo, binding->Offset,
- binding->BufferObject->Size - binding->Offset,
- &surf_offsets[i],
- dword_pitch);
- }
+ if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+ brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
} else {
- struct gl_shader_storage_buffer_binding *binding;
- binding =
- &ctx->ShaderStorageBufferBindings[shader->UniformBlocks[i].Binding];
- if (binding->BufferObject == ctx->Shared->NullBufferObj) {
- brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
- } else {
- intel_bo = intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo =
- intel_bufferobj_buffer(brw, intel_bo,
- binding->Offset,
- binding->BufferObject->Size - binding->Offset);
- brw_create_buffer_surface(brw, bo, binding->Offset,
- binding->BufferObject->Size - binding->Offset,
- &surf_offsets[i],
- dword_pitch);
- }
+ struct intel_buffer_object *intel_bo =
+ intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo =
+ intel_bufferobj_buffer(brw, intel_bo,
+ binding->Offset,
+ binding->BufferObject->Size - binding->Offset);
+ brw_create_constant_surface(brw, bo, binding->Offset,
+ binding->BufferObject->Size - binding->Offset,
+ &ubo_surf_offsets[i],
+ dword_pitch);
+ }
+ }
+
+ uint32_t *ssbo_surf_offsets =
+ &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
+
+ for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
+ struct gl_shader_storage_buffer_binding *binding =
+ &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
+
+ if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+ brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
+ } else {
+ struct intel_buffer_object *intel_bo =
+ intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo =
+ intel_bufferobj_buffer(brw, intel_bo,
+ binding->Offset,
+ binding->BufferObject->Size - binding->Offset);
+ brw_create_buffer_surface(brw, bo, binding->Offset,
+ binding->BufferObject->Size - binding->Offset,
+ &ssbo_surf_offsets[i],
+ dword_pitch);
}
}
- if (shader->NumUniformBlocks)
+ if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
}
@@ -1112,7 +1112,7 @@ brw_upload_cs_image_surfaces(struct brw_context *brw)
ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
if (prog) {
- /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
&brw->cs.base, &brw->cs.prog_data->base);
}
@@ -1120,7 +1120,7 @@ brw_upload_cs_image_surfaces(struct brw_context *brw)
const struct brw_tracked_state brw_cs_image_surfaces = {
.dirty = {
- .mesa = _NEW_PROGRAM,
+ .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
.brw = BRW_NEW_BATCH |
BRW_NEW_CS_PROG_DATA |
BRW_NEW_IMAGE_UNITS
@@ -1253,7 +1253,7 @@ update_image_surface(struct brw_context *brw,
uint32_t *surf_offset,
struct brw_image_param *param)
{
- if (u->_Valid) {
+ if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
struct gl_texture_object *obj = u->TexObj;
const unsigned format = get_image_format(brw, u->_ActualFormat, access);
@@ -1338,7 +1338,7 @@ brw_upload_wm_image_surfaces(struct brw_context *brw)
struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
if (prog) {
- /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
&brw->wm.base, &brw->wm.prog_data->base);
}
@@ -1346,6 +1346,7 @@ brw_upload_wm_image_surfaces(struct brw_context *brw)
const struct brw_tracked_state brw_wm_image_surfaces = {
.dirty = {
+ .mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 59a76559103..671a535a5bd 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -31,8 +31,6 @@
#include "gen6_gs_visitor.h"
-const unsigned MAX_GS_INPUT_VERTICES = 6;
-
namespace brw {
void
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
index e75d6aa10b8..d02c67d8a74 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
@@ -39,7 +39,7 @@ public:
void *log_data,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
- nir_shader *shader,
+ const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index) :
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 497ecec8e45..8d6d3fe1d34 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -59,9 +59,7 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (brw->is_haswell && prog_data->base.nr_image_params ?
- HSW_GS_UAV_ACCESS_ENABLE : 0));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index b7e48585482..a18dc697651 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -126,9 +126,7 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (brw->is_haswell && prog_data->base.nr_image_params ?
- HSW_VS_UAV_ACCESS_ENABLE : 0));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index fd6dab5be8b..06d5e65786b 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -113,7 +113,14 @@ upload_wm_state(struct brw_context *brw)
else if (prog_data->base.nr_image_params)
dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;
- /* _NEW_BUFFERS | _NEW_COLOR */
+ /* The "UAV access enable" bits are unnecessary on HSW because they only
+ * seem to have an effect on the HW-assisted coherency mechanism which we
+ * don't need, and the rasterization-related UAV_ONLY flag and the
+ * DISPATCH_ENABLE bit can be set independently from it.
+ * C.f. gen8_upload_ps_extra().
+ *
+ * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | _NEW_COLOR
+ */
if (brw->is_haswell &&
!(brw_color_buffer_write_enabled(brw) || writes_depth) &&
prog_data->base.nr_image_params)
@@ -221,9 +228,6 @@ gen7_upload_ps_state(struct brw_context *brw,
_mesa_get_min_invocations_per_fragment(ctx, fp, false);
assert(min_inv_per_frag >= 1);
- if (brw->is_haswell && prog_data->base.nr_image_params)
- dw4 |= HSW_PS_UAV_ACCESS_ENABLE;
-
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
if (!prog_data->no_8 && min_inv_per_frag == 1) {
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 4195f4cf4a7..d766ca7bebf 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -52,9 +52,7 @@ gen8_upload_gs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (prog_data->base.nr_image_params ?
- HSW_GS_UAV_ACCESS_ENABLE : 0));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index a686fed704f..8f0507413a7 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -25,6 +25,7 @@
#include "program/program.h"
#include "brw_state.h"
#include "brw_defines.h"
+#include "brw_wm.h"
#include "intel_batchbuffer.h"
void
@@ -65,8 +66,33 @@ gen8_upload_ps_extra(struct brw_context *brw,
if (brw->gen >= 9 && prog_data->pulls_bary)
dw1 |= GEN9_PSX_SHADER_PULLS_BARY;
- if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
- prog_data->base.nr_image_params)
+ /* The stricter cross-primitive coherency guarantees that the hardware
+ * gives us with the "Accesses UAV" bit set for at least one shader stage
+ * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
+ * redundant within the current image, atomic counter and SSBO GL APIs,
+ * which all have very loose ordering and coherency requirements and
+ * generally rely on the application to insert explicit barriers when a
+ * shader invocation is expected to see the memory writes performed by the
+ * invocations of some previous primitive. Regardless of the value of "UAV
+ * coherency required", the "Accesses UAV" bits will implicitly cause an in
+ * most cases useless DC flush when the lowermost stage with the bit set
+ * finishes execution.
+ *
+ * It would be nice to disable it, but in some cases we can't because on
+ * Gen8+ it also has an influence on rasterization via the PS UAV-only
+ * signal (which could be set independently from the coherency mechanism in
+ * the 3DSTATE_WM command on Gen7), and because in some cases it will
+ * determine whether the hardware skips execution of the fragment shader or
+ * not via the ThreadDispatchEnable signal. However if we know that
+ * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
+ * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
+ * difference so we may just disable it here.
+ *
+ * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR
+ */
+ if ((_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
+ prog_data->base.nr_image_params) &&
+ !brw_color_buffer_write_enabled(brw))
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
BEGIN_BATCH(2);
@@ -91,7 +117,7 @@ upload_ps_extra(struct brw_context *brw)
const struct brw_tracked_state gen8_ps_extra = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_BUFFERS | _NEW_COLOR,
.brw = BRW_NEW_CONTEXT |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index e1e7704655d..18b86652fd2 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -221,8 +221,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
* "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
* 16 must be used."
*/
- assert(brw->gen < 9 || mt->halign == 16);
- assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
+ if (brw->gen >= 9 || mt->num_samples == 1)
+ assert(mt->halign == 16);
}
const uint32_t surf_type = translate_tex_target(target);
@@ -470,8 +470,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
* "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
* 16 must be used."
*/
- assert(brw->gen < 9 || mt->halign == 16);
- assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
+ if (brw->gen >= 9 || mt->num_samples == 1)
+ assert(mt->halign == 16);
}
uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index 8b5048bee7e..28f5adddf14 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -53,9 +53,7 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4) / 4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (prog_data->base.nr_image_params ?
- HSW_VS_UAV_ACCESS_ENABLE : 0));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (prog_data->base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
index bb8bb8d38c9..b3d6324a5fe 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
@@ -33,8 +33,7 @@
void
dump_assembly(void *assembly, int num_annotations, struct annotation *annotation,
- const struct brw_device_info *devinfo,
- const struct gl_program *prog)
+ const struct brw_device_info *devinfo)
{
const char *last_annotation_string = NULL;
const void *last_annotation_ir = NULL;
@@ -57,19 +56,7 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation
last_annotation_ir = annotation[i].ir;
if (last_annotation_ir) {
fprintf(stderr, " ");
- if (prog->nir)
- nir_print_instr(annotation[i].ir, stderr);
- else if (!prog->Instructions)
- fprint_ir(stderr, annotation[i].ir);
- else {
- const struct prog_instruction *pi =
- (const struct prog_instruction *)annotation[i].ir;
- fprintf(stderr, "%d: ",
- (int)(pi - prog->Instructions));
- _mesa_fprint_instruction_opt(stderr,
- pi,
- 0, PROG_PRINT_DEBUG, NULL);
- }
+ nir_print_instr(annotation[i].ir, stderr);
fprintf(stderr, "\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.h b/src/mesa/drivers/dri/i965/intel_asm_annotation.h
index d9c69bc41b0..6c72326f058 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.h
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.h
@@ -60,8 +60,7 @@ struct annotation_info {
void
dump_assembly(void *assembly, int num_annotations, struct annotation *annotation,
- const struct brw_device_info *devinfo,
- const struct gl_program *prog);
+ const struct brw_device_info *devinfo);
void
annotate(const struct brw_device_info *devinfo,
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index a169c41790e..b6e35205727 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -201,6 +201,14 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
if (brw->gen < 7)
return false;
+ if (brw->gen >= 9) {
+ /* FINISHME: Enable singlesample fast MCS clears on SKL after all GPU
+ * FINISHME: hangs are resolved.
+ */
+ perf_debug("singlesample fast MCS clears disabled on gen9");
+ return false;
+ }
+
if (mt->disable_aux_buffers)
return false;
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 8adb626d420..5f80f90a91d 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -66,7 +66,7 @@ void cmod_propagation_test::SetUp()
v = new cmod_propagation_fs_visitor(compiler, prog_data, shader);
- _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
+ _mesa_init_gl_program(&fp->program.Base, GL_FRAGMENT_SHADER, 0);
devinfo->gen = 4;
}
diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
index f77b18e7db8..32e8b8f8867 100644
--- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
@@ -66,7 +66,7 @@ void saturate_propagation_test::SetUp()
v = new saturate_propagation_fs_visitor(compiler, prog_data, shader);
- _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
+ _mesa_init_gl_program(&fp->program.Base, GL_FRAGMENT_SHADER, 0);
devinfo->gen = 4;
}
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index 40253961a65..e80b71b558d 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -98,7 +98,7 @@ void copy_propagation_test::SetUp()
v = new copy_propagation_vec4_visitor(compiler, shader);
- _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
+ _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
devinfo->gen = 4;
}
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index 76028d36311..2f824617454 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -101,7 +101,7 @@ void register_coalesce_test::SetUp()
v = new register_coalesce_vec4_visitor(compiler, shader);
- _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
+ _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
devinfo->gen = 4;
}
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index d43eaf977fc..628c5708090 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -1200,18 +1200,19 @@ r200BindProgram(struct gl_context *ctx, GLenum target, struct gl_program *prog)
static struct gl_program *
r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id)
{
- struct r200_vertex_program *vp;
-
switch(target){
- case GL_VERTEX_PROGRAM_ARB:
- vp = CALLOC_STRUCT(r200_vertex_program);
- return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
- case GL_FRAGMENT_PROGRAM_ARB:
- return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
+ case GL_VERTEX_PROGRAM_ARB: {
+ struct r200_vertex_program *vp = CALLOC_STRUCT(r200_vertex_program);
+ return _mesa_init_gl_program(&vp->mesa_program.Base, target, id);
+ }
+ case GL_FRAGMENT_PROGRAM_ARB: {
+ struct gl_fragment_program *prog = CALLOC_STRUCT(gl_fragment_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
default:
_mesa_problem(ctx, "Bad target in r200NewProgram");
+ return NULL;
}
- return NULL;
}
diff --git a/src/mesa/drivers/x11/SConscript b/src/mesa/drivers/x11/SConscript
index d29f9874f44..cd5cccda0d1 100644
--- a/src/mesa/drivers/x11/SConscript
+++ b/src/mesa/drivers/x11/SConscript
@@ -4,6 +4,8 @@ env = env.Clone()
env.Append(CPPPATH = [
'#/src',
+ '#/src/glsl',
+ '#/src/glsl/nir',
'#/src/mapi',
'#/src/mesa',
'#/src/mesa/main',
diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index dee5e29d5b8..20aa4980935 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -190,6 +190,19 @@ update_uses_dual_src(struct gl_context *ctx, int buf)
blend_factor_is_dual_src(ctx->Color.Blend[buf].DstA));
}
+
+/**
+ * Return the number of per-buffer blend states to update in
+ * glBlendFunc, glBlendFuncSeparate, glBlendEquation, etc.
+ */
+static inline unsigned
+num_buffers(const struct gl_context *ctx)
+{
+ return ctx->Extensions.ARB_draw_buffers_blend
+ ? ctx->Const.MaxDrawBuffers : 1;
+}
+
+
/**
* Set the separate blend source/dest factors for all draw buffers.
*
@@ -202,9 +215,10 @@ void GLAPIENTRY
_mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB,
GLenum sfactorA, GLenum dfactorA )
{
- GLuint buf, numBuffers;
- GLboolean changed;
GET_CURRENT_CONTEXT(ctx);
+ const unsigned numBuffers = num_buffers(ctx);
+ unsigned buf;
+ bool changed = false;
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendFuncSeparate %s %s %s %s\n",
@@ -213,28 +227,38 @@ _mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB,
_mesa_enum_to_string(sfactorA),
_mesa_enum_to_string(dfactorA));
- if (!validate_blend_factors(ctx, "glBlendFuncSeparate",
- sfactorRGB, dfactorRGB,
- sfactorA, dfactorA)) {
- return;
+ /* Check if we're really changing any state. If not, return early. */
+ if (ctx->Color._BlendFuncPerBuffer) {
+ /* Check all per-buffer states */
+ for (buf = 0; buf < numBuffers; buf++) {
+ if (ctx->Color.Blend[buf].SrcRGB != sfactorRGB ||
+ ctx->Color.Blend[buf].DstRGB != dfactorRGB ||
+ ctx->Color.Blend[buf].SrcA != sfactorA ||
+ ctx->Color.Blend[buf].DstA != dfactorA) {
+ changed = true;
+ break;
+ }
+ }
}
-
- numBuffers = ctx->Extensions.ARB_draw_buffers_blend
- ? ctx->Const.MaxDrawBuffers : 1;
-
- changed = GL_FALSE;
- for (buf = 0; buf < numBuffers; buf++) {
- if (ctx->Color.Blend[buf].SrcRGB != sfactorRGB ||
- ctx->Color.Blend[buf].DstRGB != dfactorRGB ||
- ctx->Color.Blend[buf].SrcA != sfactorA ||
- ctx->Color.Blend[buf].DstA != dfactorA) {
- changed = GL_TRUE;
- break;
+ else {
+ /* only need to check 0th per-buffer state */
+ if (ctx->Color.Blend[0].SrcRGB != sfactorRGB ||
+ ctx->Color.Blend[0].DstRGB != dfactorRGB ||
+ ctx->Color.Blend[0].SrcA != sfactorA ||
+ ctx->Color.Blend[0].DstA != dfactorA) {
+ changed = true;
}
}
+
if (!changed)
return;
+ if (!validate_blend_factors(ctx, "glBlendFuncSeparate",
+ sfactorRGB, dfactorRGB,
+ sfactorA, dfactorA)) {
+ return;
+ }
+
FLUSH_VERTICES(ctx, _NEW_COLOR);
for (buf = 0; buf < numBuffers; buf++) {
@@ -242,8 +266,13 @@ _mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB,
ctx->Color.Blend[buf].DstRGB = dfactorRGB;
ctx->Color.Blend[buf].SrcA = sfactorA;
ctx->Color.Blend[buf].DstA = dfactorA;
- update_uses_dual_src(ctx, buf);
}
+
+ update_uses_dual_src(ctx, 0);
+ for (buf = 1; buf < numBuffers; buf++) {
+ ctx->Color.Blend[buf]._UsesDualSrc = ctx->Color.Blend[0]._UsesDualSrc;
+ }
+
ctx->Color._BlendFuncPerBuffer = GL_FALSE;
if (ctx->Driver.BlendFuncSeparate) {
@@ -283,18 +312,18 @@ _mesa_BlendFuncSeparateiARB(GLuint buf, GLenum sfactorRGB, GLenum dfactorRGB,
return;
}
- if (!validate_blend_factors(ctx, "glBlendFuncSeparatei",
- sfactorRGB, dfactorRGB,
- sfactorA, dfactorA)) {
- return;
- }
-
if (ctx->Color.Blend[buf].SrcRGB == sfactorRGB &&
ctx->Color.Blend[buf].DstRGB == dfactorRGB &&
ctx->Color.Blend[buf].SrcA == sfactorA &&
ctx->Color.Blend[buf].DstA == dfactorA)
return; /* no change */
+ if (!validate_blend_factors(ctx, "glBlendFuncSeparatei",
+ sfactorRGB, dfactorRGB,
+ sfactorA, dfactorA)) {
+ return;
+ }
+
FLUSH_VERTICES(ctx, _NEW_COLOR);
ctx->Color.Blend[buf].SrcRGB = sfactorRGB;
@@ -331,34 +360,43 @@ legal_blend_equation(const struct gl_context *ctx, GLenum mode)
void GLAPIENTRY
_mesa_BlendEquation( GLenum mode )
{
- GLuint buf, numBuffers;
- GLboolean changed;
GET_CURRENT_CONTEXT(ctx);
+ const unsigned numBuffers = num_buffers(ctx);
+ unsigned buf;
+ bool changed = false;
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendEquation(%s)\n",
_mesa_enum_to_string(mode));
- if (!legal_blend_equation(ctx, mode)) {
- _mesa_error(ctx, GL_INVALID_ENUM, "glBlendEquation");
- return;
+ if (ctx->Color._BlendEquationPerBuffer) {
+ /* Check all per-buffer states */
+ for (buf = 0; buf < numBuffers; buf++) {
+ if (ctx->Color.Blend[buf].EquationRGB != mode ||
+ ctx->Color.Blend[buf].EquationA != mode) {
+ changed = true;
+ break;
+ }
+ }
}
-
- numBuffers = ctx->Extensions.ARB_draw_buffers_blend
- ? ctx->Const.MaxDrawBuffers : 1;
-
- changed = GL_FALSE;
- for (buf = 0; buf < numBuffers; buf++) {
- if (ctx->Color.Blend[buf].EquationRGB != mode ||
- ctx->Color.Blend[buf].EquationA != mode) {
- changed = GL_TRUE;
- break;
+ else {
+ /* only need to check 0th per-buffer state */
+ if (ctx->Color.Blend[0].EquationRGB != mode ||
+ ctx->Color.Blend[0].EquationA != mode) {
+ changed = true;
}
}
+
if (!changed)
return;
+ if (!legal_blend_equation(ctx, mode)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "glBlendEquation");
+ return;
+ }
+
FLUSH_VERTICES(ctx, _NEW_COLOR);
+
for (buf = 0; buf < numBuffers; buf++) {
ctx->Color.Blend[buf].EquationRGB = mode;
ctx->Color.Blend[buf].EquationA = mode;
@@ -383,7 +421,7 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode)
buf, _mesa_enum_to_string(mode));
if (buf >= ctx->Const.MaxDrawBuffers) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glBlendFuncSeparatei(buffer=%u)",
+ _mesa_error(ctx, GL_INVALID_VALUE, "glBlendEquationi(buffer=%u)",
buf);
return;
}
@@ -407,15 +445,37 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode)
void GLAPIENTRY
_mesa_BlendEquationSeparate( GLenum modeRGB, GLenum modeA )
{
- GLuint buf, numBuffers;
- GLboolean changed;
GET_CURRENT_CONTEXT(ctx);
+ const unsigned numBuffers = num_buffers(ctx);
+ unsigned buf;
+ bool changed = false;
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendEquationSeparateEXT(%s %s)\n",
_mesa_enum_to_string(modeRGB),
_mesa_enum_to_string(modeA));
+ if (ctx->Color._BlendEquationPerBuffer) {
+ /* Check all per-buffer states */
+ for (buf = 0; buf < numBuffers; buf++) {
+ if (ctx->Color.Blend[buf].EquationRGB != modeRGB ||
+ ctx->Color.Blend[buf].EquationA != modeA) {
+ changed = true;
+ break;
+ }
+ }
+ }
+ else {
+ /* only need to check 0th per-buffer state */
+ if (ctx->Color.Blend[0].EquationRGB != modeRGB ||
+ ctx->Color.Blend[0].EquationA != modeA) {
+ changed = true;
+ }
+ }
+
+ if (!changed)
+ return;
+
if ( (modeRGB != modeA) && !ctx->Extensions.EXT_blend_equation_separate ) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glBlendEquationSeparateEXT not supported by driver");
@@ -432,21 +492,8 @@ _mesa_BlendEquationSeparate( GLenum modeRGB, GLenum modeA )
return;
}
- numBuffers = ctx->Extensions.ARB_draw_buffers_blend
- ? ctx->Const.MaxDrawBuffers : 1;
-
- changed = GL_FALSE;
- for (buf = 0; buf < numBuffers; buf++) {
- if (ctx->Color.Blend[buf].EquationRGB != modeRGB ||
- ctx->Color.Blend[buf].EquationA != modeA) {
- changed = GL_TRUE;
- break;
- }
- }
- if (!changed)
- return;
-
FLUSH_VERTICES(ctx, _NEW_COLOR);
+
for (buf = 0; buf < numBuffers; buf++) {
ctx->Color.Blend[buf].EquationRGB = modeRGB;
ctx->Color.Blend[buf].EquationA = modeA;
diff --git a/src/mesa/main/es1_conversion.c b/src/mesa/main/es1_conversion.c
index b254a6ef1c7..1dfe8278e71 100644
--- a/src/mesa/main/es1_conversion.c
+++ b/src/mesa/main/es1_conversion.c
@@ -1,3 +1,4 @@
+
#include <stdbool.h>
#include "api_loopback.h"
@@ -326,7 +327,24 @@ _mesa_GetTexEnvxv(GLenum target, GLenum pname, GLfixed *params)
}
break;
case GL_TEXTURE_ENV:
- if (pname != GL_TEXTURE_ENV_COLOR && pname != GL_RGB_SCALE && pname != GL_ALPHA_SCALE && pname != GL_TEXTURE_ENV_MODE && pname != GL_COMBINE_RGB && pname != GL_COMBINE_ALPHA && pname != GL_SRC0_RGB && pname != GL_SRC1_RGB && pname != GL_SRC2_RGB && pname != GL_SRC0_ALPHA && pname != GL_SRC1_ALPHA && pname != GL_SRC2_ALPHA && pname != GL_OPERAND0_RGB && pname != GL_OPERAND1_RGB && pname != GL_OPERAND2_RGB && pname != GL_OPERAND0_ALPHA && pname != GL_OPERAND1_ALPHA && pname != GL_OPERAND2_ALPHA) {
+ if (pname != GL_TEXTURE_ENV_COLOR &&
+ pname != GL_RGB_SCALE &&
+ pname != GL_ALPHA_SCALE &&
+ pname != GL_TEXTURE_ENV_MODE &&
+ pname != GL_COMBINE_RGB &&
+ pname != GL_COMBINE_ALPHA &&
+ pname != GL_SRC0_RGB &&
+ pname != GL_SRC1_RGB &&
+ pname != GL_SRC2_RGB &&
+ pname != GL_SRC0_ALPHA &&
+ pname != GL_SRC1_ALPHA &&
+ pname != GL_SRC2_ALPHA &&
+ pname != GL_OPERAND0_RGB &&
+ pname != GL_OPERAND1_RGB &&
+ pname != GL_OPERAND2_RGB &&
+ pname != GL_OPERAND0_ALPHA &&
+ pname != GL_OPERAND1_ALPHA &&
+ pname != GL_OPERAND2_ALPHA) {
_mesa_error(_mesa_get_current_context(), GL_INVALID_ENUM,
"glGetTexEnvxv(target=0x%x)", target);
return;
diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp
index e4e2a18c1da..e63d0f1ec55 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -40,7 +40,7 @@
#include "glsl/ir_optimization.h"
#include "glsl/glsl_parser_extras.h"
#include "glsl/glsl_symbol_table.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "program/ir_to_mesa.h"
#include "program/program.h"
#include "program/programopt.h"
@@ -975,13 +975,11 @@ static void load_texture( texenv_fragment_program *p, GLuint unit )
ir_var_uniform);
p->top_instructions->push_head(sampler);
- /* Set the texture unit for this sampler. The linker will pick this value
- * up and do-the-right-thing.
- *
- * NOTE: The cast to int is important. Without it, the constant will have
- * type uint, and things later on may get confused.
+ /* Set the texture unit for this sampler in the same way that
+ * layout(binding=X) would.
*/
- sampler->constant_value = new(p->mem_ctx) ir_constant(int(unit));
+ sampler->data.explicit_binding = true;
+ sampler->data.binding = unit;
deref = new(p->mem_ctx) ir_dereference_variable(sampler);
tex->set_sampler(deref, glsl_type::vec4_type);
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index a6183b47e2e..34cc9218add 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -1690,11 +1690,10 @@ _mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS,
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps );
-#if 0
if (ctx->Driver.ProgramStringNotify)
ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
&prog->Base );
-#endif
+
_mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
&key, sizeof(key), &prog->Base);
}
diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h
index 618f43d0aaa..378997b38b2 100644
--- a/src/mesa/main/format_utils.h
+++ b/src/mesa/main/format_utils.h
@@ -34,6 +34,7 @@
#include "imports.h"
#include "macros.h"
#include "util/rounding.h"
+#include "util/half_float.h"
extern const mesa_array_format RGBA32_FLOAT;
extern const mesa_array_format RGBA8_UBYTE;
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 350e6752c8b..230ebbc67f4 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -307,154 +307,6 @@ _mesa_bitcount_64(uint64_t n)
}
#endif
-
-/**
- * Convert a 4-byte float to a 2-byte half float.
- *
- * Not all float32 values can be represented exactly as a float16 value. We
- * round such intermediate float32 values to the nearest float16. When the
- * float32 lies exactly between to float16 values, we round to the one with
- * an even mantissa.
- *
- * This rounding behavior has several benefits:
- * - It has no sign bias.
- *
- * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
- * GPU ISA.
- *
- * - By reproducing the behavior of the GPU (at least on Intel hardware),
- * compile-time evaluation of constant packHalf2x16 GLSL expressions will
- * result in the same value as if the expression were executed on the GPU.
- */
-GLhalfARB
-_mesa_float_to_half(float val)
-{
- const fi_type fi = {val};
- const int flt_m = fi.i & 0x7fffff;
- const int flt_e = (fi.i >> 23) & 0xff;
- const int flt_s = (fi.i >> 31) & 0x1;
- int s, e, m = 0;
- GLhalfARB result;
-
- /* sign bit */
- s = flt_s;
-
- /* handle special cases */
- if ((flt_e == 0) && (flt_m == 0)) {
- /* zero */
- /* m = 0; - already set */
- e = 0;
- }
- else if ((flt_e == 0) && (flt_m != 0)) {
- /* denorm -- denorm float maps to 0 half */
- /* m = 0; - already set */
- e = 0;
- }
- else if ((flt_e == 0xff) && (flt_m == 0)) {
- /* infinity */
- /* m = 0; - already set */
- e = 31;
- }
- else if ((flt_e == 0xff) && (flt_m != 0)) {
- /* NaN */
- m = 1;
- e = 31;
- }
- else {
- /* regular number */
- const int new_exp = flt_e - 127;
- if (new_exp < -14) {
- /* The float32 lies in the range (0.0, min_normal16) and is rounded
- * to a nearby float16 value. The result will be either zero, subnormal,
- * or normal.
- */
- e = 0;
- m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
- }
- else if (new_exp > 15) {
- /* map this value to infinity */
- /* m = 0; - already set */
- e = 31;
- }
- else {
- /* The float32 lies in the range
- * [min_normal16, max_normal16 + max_step16)
- * and is rounded to a nearby float16 value. The result will be
- * either normal or infinite.
- */
- e = new_exp + 15;
- m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
- }
- }
-
- assert(0 <= m && m <= 1024);
- if (m == 1024) {
- /* The float32 was rounded upwards into the range of the next exponent,
- * so bump the exponent. This correctly handles the case where f32
- * should be rounded up to float16 infinity.
- */
- ++e;
- m = 0;
- }
-
- result = (s << 15) | (e << 10) | m;
- return result;
-}
-
-
-/**
- * Convert a 2-byte half float to a 4-byte float.
- * Based on code from:
- * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
- */
-float
-_mesa_half_to_float(GLhalfARB val)
-{
- /* XXX could also use a 64K-entry lookup table */
- const int m = val & 0x3ff;
- const int e = (val >> 10) & 0x1f;
- const int s = (val >> 15) & 0x1;
- int flt_m, flt_e, flt_s;
- fi_type fi;
- float result;
-
- /* sign bit */
- flt_s = s;
-
- /* handle special cases */
- if ((e == 0) && (m == 0)) {
- /* zero */
- flt_m = 0;
- flt_e = 0;
- }
- else if ((e == 0) && (m != 0)) {
- /* denorm -- denorm half will fit in non-denorm single */
- const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
- float mantissa = ((float) (m)) / 1024.0f;
- float sign = s ? -1.0f : 1.0f;
- return sign * mantissa * half_denorm;
- }
- else if ((e == 31) && (m == 0)) {
- /* infinity */
- flt_e = 0xff;
- flt_m = 0;
- }
- else if ((e == 31) && (m != 0)) {
- /* NaN */
- flt_e = 0xff;
- flt_m = 1;
- }
- else {
- /* regular */
- flt_e = e + 112;
- flt_m = m << 13;
- }
-
- fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
- result = fi.f;
- return result;
-}
-
/*@}*/
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index 90247587be3..042147fd8bb 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -396,13 +396,6 @@ _mesa_flsll(uint64_t n)
#endif
}
-
-extern GLhalfARB
-_mesa_float_to_half(float f);
-
-extern float
-_mesa_half_to_float(GLhalfARB h);
-
static inline bool
_mesa_half_is_negative(GLhalfARB h)
{
diff --git a/src/mesa/main/matrix.c b/src/mesa/main/matrix.c
index 2b8016a4a72..5ff5ac5bfe1 100644
--- a/src/mesa/main/matrix.c
+++ b/src/mesa/main/matrix.c
@@ -151,7 +151,6 @@ _mesa_MatrixMode( GLenum mode )
if (ctx->Transform.MatrixMode == mode && mode != GL_TEXTURE)
return;
- FLUSH_VERTICES(ctx, _NEW_TRANSFORM);
switch (mode) {
case GL_MODELVIEW:
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index ab16c2854a8..50469956c6e 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -37,6 +37,7 @@
#include "texstore.h"
#include "image.h"
#include "macros.h"
+#include "util/half_float.h"
#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index cbfb15522f0..e57b98a412d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -42,7 +42,7 @@
#include "main/config.h"
#include "glapi/glapi.h"
#include "math/m_matrix.h" /* GLmatrix */
-#include "glsl/shader_enums.h"
+#include "glsl/nir/shader_enums.h"
#include "main/formats.h" /* MESA_FORMAT_COUNT */
@@ -94,11 +94,6 @@ struct vbo_context;
#define PRIM_OUTSIDE_BEGIN_END (PRIM_MAX + 1)
#define PRIM_UNKNOWN (PRIM_MAX + 2)
-#define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING)
-#define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX)
-#define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING)
-#define FRAG_RESULT_MAX (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
-
/**
* Determine if the given gl_varying_slot appears in the fragment shader.
*/
@@ -487,26 +482,24 @@ struct gl_colorbuffer_attrib
struct gl_current_attrib
{
/**
- * \name Current vertex attributes.
+ * \name Current vertex attributes (color, texcoords, etc).
* \note Values are valid only after FLUSH_VERTICES has been called.
* \note Index and Edgeflag current values are stored as floats in the
* SIX and SEVEN attribute slots.
+ * \note We need double storage for 64-bit vertex attributes
*/
- /* we need double storage for this for vertex attrib 64bit */
- GLfloat Attrib[VERT_ATTRIB_MAX][4*2]; /**< Position, color, texcoords, etc */
+ GLfloat Attrib[VERT_ATTRIB_MAX][4*2];
/**
- * \name Current raster position attributes (always valid).
- * \note This set of attributes is very similar to the SWvertex struct.
+ * \name Current raster position attributes (always up to date after a
+ * glRasterPos call).
*/
- /*@{*/
GLfloat RasterPos[4];
GLfloat RasterDistance;
GLfloat RasterColor[4];
GLfloat RasterSecondaryColor[4];
GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4];
GLboolean RasterPosValid;
- /*@}*/
};
@@ -1866,24 +1859,6 @@ typedef enum
/**
- * \brief Layout qualifiers for gl_FragDepth.
- *
- * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with
- * a layout qualifier.
- *
- * \see enum ir_depth_layout
- */
-enum gl_frag_depth_layout
-{
- FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */
- FRAG_DEPTH_LAYOUT_ANY,
- FRAG_DEPTH_LAYOUT_GREATER,
- FRAG_DEPTH_LAYOUT_LESS,
- FRAG_DEPTH_LAYOUT_UNCHANGED
-};
-
-
-/**
* Base class for any kind of program object
*/
struct gl_program
@@ -2286,12 +2261,34 @@ struct gl_shader
unsigned num_combined_uniform_components;
/**
- * This shader's uniform block information.
+ * This shader's uniform/ssbo block information.
*
* These fields are only set post-linking.
+ *
+ * BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is
+ * useful during the linking process so that we don't have to handle SSBOs
+ * specifically.
+ *
+ * UniformBlocks is a list of UBOs. This is useful for backends that need
+ * or prefer to see separate index spaces for UBOS and SSBOs like the GL
+ * API specifies.
+ *
+ * ShaderStorageBlocks is a list of SSBOs. This is useful for backends that
+ * need or prefer to see separate index spaces for UBOS and SSBOs like the
+ * GL API specifies.
+ *
+ * UniformBlocks and ShaderStorageBlocks only have pointers into
+ * BufferInterfaceBlocks so the actual resource information is not
+ * duplicated.
*/
+ unsigned NumBufferInterfaceBlocks;
+ struct gl_uniform_block *BufferInterfaceBlocks;
+
unsigned NumUniformBlocks;
- struct gl_uniform_block *UniformBlocks;
+ struct gl_uniform_block **UniformBlocks;
+
+ unsigned NumShaderStorageBlocks;
+ struct gl_uniform_block **ShaderStorageBlocks;
struct exec_list *ir;
struct exec_list *packed_varyings;
@@ -2694,8 +2691,33 @@ struct gl_shader_program
*/
unsigned LastClipDistanceArraySize;
+ /**
+ * This shader's uniform/ssbo block information.
+ *
+ * BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is
+ * useful during the linking process so that we don't have to handle SSBOs
+ * specifically.
+ *
+ * UniformBlocks is a list of UBOs. This is useful for backends that need
+ * or prefer to see separate index spaces for UBOS and SSBOs like the GL
+ * API specifies.
+ *
+ * ShaderStorageBlocks is a list of SSBOs. This is useful for backends that
+ * need or prefer to see separate index spaces for UBOS and SSBOs like the
+ * GL API specifies.
+ *
+ * UniformBlocks and ShaderStorageBlocks only have pointers into
+ * BufferInterfaceBlocks so the actual resource information is not
+ * duplicated and are only set after linking.
+ */
unsigned NumBufferInterfaceBlocks;
- struct gl_uniform_block *UniformBlocks;
+ struct gl_uniform_block *BufferInterfaceBlocks;
+
+ unsigned NumUniformBlocks;
+ struct gl_uniform_block **UniformBlocks;
+
+ unsigned NumShaderStorageBlocks;
+ struct gl_uniform_block **ShaderStorageBlocks;
/**
* Indices into the _LinkedShaders's UniformBlocks[] array for each stage
@@ -4076,13 +4098,6 @@ struct gl_image_unit
GLboolean Layered;
/**
- * GL_TRUE if the state of this image unit is valid and access from
- * the shader is allowed. Otherwise loads from this unit should
- * return zero and stores should have no effect.
- */
- GLboolean _Valid;
-
- /**
* Layer of the texture object bound to this unit as specified by the
* application.
*/
diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 00e31b05c99..89faf515443 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -1073,6 +1073,21 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest,
}
}
break;
+ case GL_UNSIGNED_INT_24_8:
+ {
+ const GLdouble scale = (GLdouble) 0xffffff;
+ GLuint *dst = (GLuint *) dest;
+ GLuint i;
+ for (i = 0; i < n; i++) {
+ GLuint z = (GLuint) (depthSpan[i] * scale);
+ assert(z <= 0xffffff);
+ dst[i] = (z << 8);
+ }
+ if (dstPacking->SwapBytes) {
+ _mesa_swap4( (GLuint *) dst, n );
+ }
+ break;
+ }
case GL_UNSIGNED_INT:
{
GLuint *dst = (GLuint *) dest;
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 6d73e3bdcf2..8182d3dcc04 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -485,8 +485,14 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
case GL_COMPUTE_SUBROUTINE_UNIFORM:
case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
- case GL_BUFFER_VARIABLE:
return RESOURCE_UNI(res)->array_elements;
+ case GL_BUFFER_VARIABLE:
+ /* Unsized arrays */
+ if (RESOURCE_UNI(res)->array_stride > 0 &&
+ RESOURCE_UNI(res)->array_elements == 0)
+ return 1;
+ else
+ return RESOURCE_UNI(res)->array_elements;
case GL_VERTEX_SUBROUTINE:
case GL_GEOMETRY_SUBROUTINE:
case GL_FRAGMENT_SUBROUTINE:
@@ -833,193 +839,6 @@ program_resource_location(struct gl_shader_program *shProg,
}
}
-static char*
-get_top_level_name(const char *name)
-{
- const char *first_dot = strchr(name, '.');
- const char *first_square_bracket = strchr(name, '[');
- int name_size = 0;
- /* From ARB_program_interface_query spec:
- *
- * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the
- * number of active array elements of the top-level shader storage block
- * member containing to the active variable is written to <params>. If the
- * top-level block member is not declared as an array, the value one is
- * written to <params>. If the top-level block member is an array with no
- * declared size, the value zero is written to <params>.
- */
-
- /* The buffer variable is on top level.*/
- if (!first_square_bracket && !first_dot)
- name_size = strlen(name);
- else if ((!first_square_bracket ||
- (first_dot && first_dot < first_square_bracket)))
- name_size = first_dot - name;
- else
- name_size = first_square_bracket - name;
-
- return strndup(name, name_size);
-}
-
-static char*
-get_var_name(const char *name)
-{
- const char *first_dot = strchr(name, '.');
-
- if (!first_dot)
- return strdup(name);
-
- return strndup(first_dot+1, strlen(first_dot) - 1);
-}
-
-static GLint
-program_resource_top_level_array_size(struct gl_shader_program *shProg,
- struct gl_program_resource *res,
- const char *name)
-{
- int block_index = RESOURCE_UNI(res)->block_index;
- int array_size = -1;
- char *var_name = get_top_level_name(name);
- char *interface_name =
- get_top_level_name(shProg->UniformBlocks[block_index].Name);
-
- if (strcmp(var_name, interface_name) == 0) {
- /* Deal with instanced array of SSBOs */
- char *temp_name = get_var_name(name);
- free(var_name);
- var_name = get_top_level_name(temp_name);
- free(temp_name);
- }
-
- for (unsigned i = 0; i < shProg->NumShaders; i++) {
- if (shProg->Shaders[i] == NULL)
- continue;
-
- const gl_shader *stage = shProg->Shaders[i];
- foreach_in_list(ir_instruction, node, stage->ir) {
- ir_variable *var = node->as_variable();
- if (!var || !var->get_interface_type() ||
- var->data.mode != ir_var_shader_storage)
- continue;
-
- const glsl_type *interface = var->get_interface_type();
-
- if (strcmp(interface_name, interface->name) != 0)
- continue;
-
- for (unsigned i = 0; i < interface->length; i++) {
- const glsl_struct_field *field = &interface->fields.structure[i];
- if (strcmp(field->name, var_name) != 0)
- continue;
- /* From GL_ARB_program_interface_query spec:
- *
- * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer
- * identifying the number of active array elements of the top-level
- * shader storage block member containing to the active variable is
- * written to <params>. If the top-level block member is not
- * declared as an array, the value one is written to <params>. If
- * the top-level block member is an array with no declared size,
- * the value zero is written to <params>.
- */
- if (field->type->is_unsized_array())
- array_size = 0;
- else if (field->type->is_array())
- array_size = field->type->length;
- else
- array_size = 1;
- goto found_top_level_array_size;
- }
- }
- }
-found_top_level_array_size:
- free(interface_name);
- free(var_name);
- return array_size;
-}
-
-static GLint
-program_resource_top_level_array_stride(struct gl_shader_program *shProg,
- struct gl_program_resource *res,
- const char *name)
-{
- int block_index = RESOURCE_UNI(res)->block_index;
- int array_stride = -1;
- char *var_name = get_top_level_name(name);
- char *interface_name =
- get_top_level_name(shProg->UniformBlocks[block_index].Name);
-
- if (strcmp(var_name, interface_name) == 0) {
- /* Deal with instanced array of SSBOs */
- char *temp_name = get_var_name(name);
- free(var_name);
- var_name = get_top_level_name(temp_name);
- free(temp_name);
- }
-
- for (unsigned i = 0; i < shProg->NumShaders; i++) {
- if (shProg->Shaders[i] == NULL)
- continue;
-
- const gl_shader *stage = shProg->Shaders[i];
- foreach_in_list(ir_instruction, node, stage->ir) {
- ir_variable *var = node->as_variable();
- if (!var || !var->get_interface_type() ||
- var->data.mode != ir_var_shader_storage)
- continue;
-
- const glsl_type *interface = var->get_interface_type();
-
- if (strcmp(interface_name, interface->name) != 0) {
- continue;
- }
-
- for (unsigned i = 0; i < interface->length; i++) {
- const glsl_struct_field *field = &interface->fields.structure[i];
- if (strcmp(field->name, var_name) != 0)
- continue;
- /* From GL_ARB_program_interface_query:
- *
- * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer
- * identifying the stride between array elements of the top-level
- * shader storage block member containing the active variable is
- * written to <params>. For top-level block members declared as
- * arrays, the value written is the difference, in basic machine
- * units, between the offsets of the active variable for
- * consecutive elements in the top-level array. For top-level
- * block members not declared as an array, zero is written to
- * <params>."
- */
- if (field->type->is_array()) {
- const enum glsl_matrix_layout matrix_layout =
- glsl_matrix_layout(field->matrix_layout);
- bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
- const glsl_type *array_type = field->type->fields.array;
-
- if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
- if (array_type->is_record() || array_type->is_array()) {
- array_stride = array_type->std140_size(row_major);
- array_stride = glsl_align(array_stride, 16);
- } else {
- unsigned element_base_align = 0;
- element_base_align = array_type->std140_base_alignment(row_major);
- array_stride = MAX2(element_base_align, 16);
- }
- } else {
- array_stride = array_type->std430_array_stride(row_major);
- }
- } else {
- array_stride = 0;
- }
- goto found_top_level_array_size;
- }
- }
- }
-found_top_level_array_size:
- free(interface_name);
- free(var_name);
- return array_stride;
-}
-
/**
* Function implements following location queries:
* glGetUniformLocation
@@ -1133,7 +952,8 @@ get_buffer_property(struct gl_shader_program *shProg,
(*val)++;
}
return 1;
- case GL_ACTIVE_VARIABLES:
+ case GL_ACTIVE_VARIABLES: {
+ unsigned num_values = 0;
for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
struct gl_program_resource *uni =
@@ -1143,8 +963,10 @@ get_buffer_property(struct gl_shader_program *shProg,
continue;
*val++ =
_mesa_program_resource_index(shProg, uni);
+ num_values++;
}
- return RESOURCE_UBO(res)->NumUniforms;
+ return num_values;
+ }
}
} else if (res->Type == GL_SHADER_STORAGE_BLOCK) {
switch (prop) {
@@ -1166,7 +988,8 @@ get_buffer_property(struct gl_shader_program *shProg,
(*val)++;
}
return 1;
- case GL_ACTIVE_VARIABLES:
+ case GL_ACTIVE_VARIABLES: {
+ unsigned num_values = 0;
for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
struct gl_program_resource *uni =
@@ -1176,8 +999,10 @@ get_buffer_property(struct gl_shader_program *shProg,
continue;
*val++ =
_mesa_program_resource_index(shProg, uni);
+ num_values++;
}
- return RESOURCE_UBO(res)->NumUniforms;
+ return num_values;
+ }
}
} else if (res->Type == GL_ATOMIC_COUNTER_BUFFER) {
switch (prop) {
@@ -1251,8 +1076,15 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
switch (res->Type) {
case GL_UNIFORM:
case GL_BUFFER_VARIABLE:
+ /* Test if a buffer variable is an array or an unsized array.
+ * Unsized arrays return zero as array size.
+ */
+ if (RESOURCE_UNI(res)->is_shader_storage &&
+ RESOURCE_UNI(res)->array_stride > 0)
+ *val = RESOURCE_UNI(res)->array_elements;
+ else
*val = MAX2(RESOURCE_UNI(res)->array_elements, 1);
- return 1;
+ return 1;
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
*val = MAX2(_mesa_program_resource_array_size(res), 1);
@@ -1374,14 +1206,12 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
case GL_TOP_LEVEL_ARRAY_SIZE:
VALIDATE_TYPE(GL_BUFFER_VARIABLE);
- *val = program_resource_top_level_array_size(shProg, res,
- _mesa_program_resource_name(res));
+ *val = RESOURCE_UNI(res)->top_level_array_size;
return 1;
case GL_TOP_LEVEL_ARRAY_STRIDE:
VALIDATE_TYPE(GL_BUFFER_VARIABLE);
- *val = program_resource_top_level_array_stride(shProg, res,
- _mesa_program_resource_name(res));
+ *val = RESOURCE_UNI(res)->top_level_array_stride;
return 1;
/* GL_ARB_tessellation_shader */
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 9dd1054c8ee..18e463d4ccc 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -713,10 +713,10 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
if (!has_ubo)
break;
- for (i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+ for (i = 0; i < shProg->NumUniformBlocks; i++) {
/* Add one for the terminating NUL character.
*/
- const GLint len = strlen(shProg->UniformBlocks[i].Name) + 1;
+ const GLint len = strlen(shProg->UniformBlocks[i]->Name) + 1;
if (len > max_len)
max_len = len;
@@ -729,11 +729,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
if (!has_ubo)
break;
- *params = 0;
- for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
- if (!shProg->UniformBlocks[i].IsShaderStorage)
- (*params)++;
- }
+ *params = shProg->NumUniformBlocks;
return;
case GL_PROGRAM_BINARY_RETRIEVABLE_HINT:
/* This enum isn't part of the OES extension for OpenGL ES 2.0. It is
diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
index bd4b7c7be3b..c4ebf4201fb 100644
--- a/src/mesa/main/shaderimage.c
+++ b/src/mesa/main/shaderimage.c
@@ -415,8 +415,8 @@ _mesa_init_image_units(struct gl_context *ctx)
ctx->ImageUnits[i] = _mesa_default_image_unit(ctx);
}
-static GLboolean
-validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u)
+GLboolean
+_mesa_is_image_unit_valid(struct gl_context *ctx, struct gl_image_unit *u)
{
struct gl_texture_object *t = u->TexObj;
mesa_format tex_format;
@@ -424,7 +424,8 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u)
if (!t)
return GL_FALSE;
- _mesa_test_texobj_completeness(ctx, t);
+ if (!t->_BaseComplete && !t->_MipmapComplete)
+ _mesa_test_texobj_completeness(ctx, t);
if (u->Level < t->BaseLevel ||
u->Level > t->_MaxLevel ||
@@ -473,17 +474,6 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u)
return GL_TRUE;
}
-void
-_mesa_validate_image_units(struct gl_context *ctx)
-{
- unsigned i;
-
- for (i = 0; i < ctx->Const.MaxImageUnits; ++i) {
- struct gl_image_unit *u = &ctx->ImageUnits[i];
- u->_Valid = validate_image_unit(ctx, u);
- }
-}
-
static GLboolean
validate_bind_image_texture(struct gl_context *ctx, GLuint unit,
GLuint texture, GLint level, GLboolean layered,
@@ -567,7 +557,6 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level,
u->Access = access;
u->Format = format;
u->_ActualFormat = _mesa_get_shader_image_format(format);
- u->_Valid = validate_image_unit(ctx, u);
if (u->TexObj && _mesa_tex_target_is_layered(u->TexObj->Target)) {
u->Layered = layered;
@@ -703,7 +692,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures)
u->Access = GL_READ_WRITE;
u->Format = tex_format;
u->_ActualFormat = _mesa_get_shader_image_format(tex_format);
- u->_Valid = validate_image_unit(ctx, u);
} else {
/* Unbind the texture from the unit */
_mesa_reference_texobj(&u->TexObj, NULL);
@@ -713,7 +701,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures)
u->Access = GL_READ_ONLY;
u->Format = GL_R8;
u->_ActualFormat = MESA_FORMAT_R_UNORM8;
- u->_Valid = GL_FALSE;
}
}
diff --git a/src/mesa/main/shaderimage.h b/src/mesa/main/shaderimage.h
index bbe088a2459..94ee814a716 100644
--- a/src/mesa/main/shaderimage.h
+++ b/src/mesa/main/shaderimage.h
@@ -55,13 +55,15 @@ void
_mesa_init_image_units(struct gl_context *ctx);
/**
- * Recalculate the \c _Valid flag of a context's shader image units.
+ * Return GL_TRUE if the state of the image unit passed as argument is valid
+ * and access from the shader is allowed. Otherwise loads from this unit
+ * should return zero and stores should have no effect.
*
- * To be called when the state of any texture bound to an image unit
- * changes.
+ * The result depends on context state other than the passed image unit, part
+ * of the _NEW_TEXTURE set.
*/
-void
-_mesa_validate_image_units(struct gl_context *ctx);
+GLboolean
+_mesa_is_image_unit_valid(struct gl_context *ctx, struct gl_image_unit *u);
void GLAPIENTRY
_mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level,
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 4e85fda24b4..ffc71931fec 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -290,8 +290,8 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
ralloc_free(shProg->InfoLog);
shProg->InfoLog = ralloc_strdup(shProg, "");
- ralloc_free(shProg->UniformBlocks);
- shProg->UniformBlocks = NULL;
+ ralloc_free(shProg->BufferInterfaceBlocks);
+ shProg->BufferInterfaceBlocks = NULL;
shProg->NumBufferInterfaceBlocks = 0;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
ralloc_free(shProg->UniformBlockStageIndex[i]);
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index 1acaf59f432..c37b31d1753 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -107,6 +107,11 @@ _mesa_alloc_shared_state(struct gl_context *ctx)
};
STATIC_ASSERT(ARRAY_SIZE(targets) == NUM_TEXTURE_TARGETS);
shared->DefaultTex[i] = ctx->Driver.NewTextureObject(ctx, 0, targets[i]);
+ /* Need to explicitly set/overwrite the TargetIndex field here since
+ * the call to _mesa_tex_target_to_index() in NewTextureObject() may
+ * fail if the texture target is not supported.
+ */
+ shared->DefaultTex[i]->TargetIndex = i;
}
/* sanity check */
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index d3b1c72b08d..4043c4f2057 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -391,8 +391,12 @@ _mesa_update_state_locked( struct gl_context *ctx )
GLbitfield new_state = ctx->NewState;
GLbitfield prog_flags = _NEW_PROGRAM;
GLbitfield new_prog_state = 0x0;
+ const GLbitfield computed_states = ~(_NEW_CURRENT_ATTRIB | _NEW_LINE);
- if (new_state == _NEW_CURRENT_ATTRIB)
+ /* we can skip a bunch of state validation checks if the dirty
+ * state matches one or more bits in 'computed_states'.
+ */
+ if ((new_state & computed_states) == 0)
goto out;
if (MESA_VERBOSE & VERBOSE_STATE)
diff --git a/src/mesa/main/texcompress_bptc.c b/src/mesa/main/texcompress_bptc.c
index f0f6553a01b..26e59158007 100644
--- a/src/mesa/main/texcompress_bptc.c
+++ b/src/mesa/main/texcompress_bptc.c
@@ -30,6 +30,7 @@
#include "texcompress.h"
#include "texcompress_bptc.h"
#include "util/format_srgb.h"
+#include "util/half_float.h"
#include "texstore.h"
#include "macros.h"
#include "image.h"
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index 173e43c817c..547055ecf39 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -286,6 +286,12 @@ _mesa_initialize_texture_object( struct gl_context *ctx,
obj->RefCount = 1;
obj->Name = name;
obj->Target = target;
+ if (target != 0) {
+ obj->TargetIndex = _mesa_tex_target_to_index(ctx, target);
+ }
+ else {
+ obj->TargetIndex = NUM_TEXTURE_TARGETS; /* invalid/error value */
+ }
obj->Priority = 1.0F;
obj->BaseLevel = 0;
obj->MaxLevel = 1000;
@@ -340,6 +346,10 @@ finish_texture_init(struct gl_context *ctx, GLenum target,
GLenum filter = GL_LINEAR;
assert(obj->Target == 0);
+ obj->Target = target;
+ obj->TargetIndex = _mesa_tex_target_to_index(ctx, target);
+ assert(obj->TargetIndex < NUM_TEXTURE_TARGETS);
+
switch (target) {
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
@@ -1185,46 +1195,26 @@ invalidate_tex_image_error_check(struct gl_context *ctx, GLuint texture,
return t;
}
-/**
- * Wrapper for the driver function. Need this because _mesa_new_texture_object
- * permits a target of 0 and does not initialize targetIndex.
- */
-struct gl_texture_object *
-_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target)
-{
- struct gl_texture_object *texObj = NULL;
- GLint targetIndex;
-
- if (target == 0)
- return texObj;
-
- texObj = ctx->Driver.NewTextureObject(ctx, 0, target);
- targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
- assert(targetIndex < NUM_TEXTURE_TARGETS);
- texObj->TargetIndex = targetIndex;
-
- return texObj;
-}
/**
* Helper function for glCreateTextures and glGenTextures. Need this because
* glCreateTextures should throw errors if target = 0. This is not exposed to
* the rest of Mesa to encourage Mesa internals to use nameless textures,
* which do not require expensive hash lookups.
+ * \param target either 0 or a a valid / error-checked texture target enum
*/
static void
create_textures(struct gl_context *ctx, GLenum target,
- GLsizei n, GLuint *textures, bool dsa)
+ GLsizei n, GLuint *textures, const char *caller)
{
GLuint first;
GLint i;
- const char *func = dsa ? "Create" : "Gen";
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
- _mesa_debug(ctx, "gl%sTextures %d\n", func, n);
+ _mesa_debug(ctx, "%s %d\n", caller, n);
if (n < 0) {
- _mesa_error( ctx, GL_INVALID_VALUE, "gl%sTextures(n < 0)", func );
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", caller);
return;
}
@@ -1241,28 +1231,14 @@ create_textures(struct gl_context *ctx, GLenum target,
/* Allocate new, empty texture objects */
for (i = 0; i < n; i++) {
struct gl_texture_object *texObj;
- GLint targetIndex;
GLuint name = first + i;
texObj = ctx->Driver.NewTextureObject(ctx, name, target);
if (!texObj) {
mtx_unlock(&ctx->Shared->Mutex);
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sTextures", func);
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sTextures", caller);
return;
}
- /* Initialize the target index if target is non-zero. */
- if (target != 0) {
- targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
- if (targetIndex < 0) { /* Bad Target */
- mtx_unlock(&ctx->Shared->Mutex);
- _mesa_error(ctx, GL_INVALID_ENUM, "gl%sTextures(target = %s)",
- func, _mesa_enum_to_string(texObj->Target));
- return;
- }
- assert(targetIndex < NUM_TEXTURE_TARGETS);
- texObj->TargetIndex = targetIndex;
- }
-
/* insert into hash table */
_mesa_HashInsert(ctx->Shared->TexObjects, texObj->Name, texObj);
@@ -1296,7 +1272,7 @@ void GLAPIENTRY
_mesa_GenTextures(GLsizei n, GLuint *textures)
{
GET_CURRENT_CONTEXT(ctx);
- create_textures(ctx, 0, n, textures, false);
+ create_textures(ctx, 0, n, textures, "glGenTextures");
}
/**
@@ -1329,7 +1305,7 @@ _mesa_CreateTextures(GLenum target, GLsizei n, GLuint *textures)
return;
}
- create_textures(ctx, target, n, textures, true);
+ create_textures(ctx, target, n, textures, "glCreateTextures");
}
/**
@@ -1383,8 +1359,12 @@ unbind_texobj_from_texunits(struct gl_context *ctx,
const gl_texture_index index = texObj->TargetIndex;
GLuint u;
- if (texObj->Target == 0)
+ if (texObj->Target == 0) {
+ /* texture was never bound */
return;
+ }
+
+ assert(index < NUM_TEXTURE_TARGETS);
for (u = 0; u < ctx->Texture.NumCurrentTexUsed; u++) {
struct gl_texture_unit *unit = &ctx->Texture.Unit[u];
@@ -1752,10 +1732,11 @@ _mesa_BindTexture( GLenum target, GLuint texName )
_mesa_HashInsert(ctx->Shared->TexObjects, texName, newTexObj);
mtx_unlock(&ctx->Shared->Mutex);
}
- newTexObj->Target = target;
- newTexObj->TargetIndex = targetIndex;
}
+ assert(newTexObj->Target == target);
+ assert(newTexObj->TargetIndex == targetIndex);
+
bind_texture(ctx, ctx->Texture.CurrentUnit, newTexObj);
}
@@ -1778,19 +1759,12 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture)
{
GET_CURRENT_CONTEXT(ctx);
struct gl_texture_object *texObj;
- struct gl_texture_unit *texUnit;
if (unit >= _mesa_max_tex_unit(ctx)) {
_mesa_error(ctx, GL_INVALID_VALUE, "glBindTextureUnit(unit=%u)", unit);
return;
}
- texUnit = _mesa_get_tex_unit(ctx, unit);
- assert(texUnit);
- if (!texUnit) {
- return;
- }
-
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glBindTextureUnit %s %d\n",
_mesa_enum_to_string(GL_TEXTURE0+unit), (GLint) texture);
@@ -1812,7 +1786,7 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture)
/* Error checking */
if (!texObj) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glBindTextureUnit(non-gen name)");
+ "glBindTextureUnit(non-gen name)");
return;
}
if (texObj->Target == 0) {
diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h
index 690878c85fc..8421337de4d 100644
--- a/src/mesa/main/texobj.h
+++ b/src/mesa/main/texobj.h
@@ -202,9 +202,6 @@ _mesa_unlock_context_textures( struct gl_context *ctx );
extern void
_mesa_lock_context_textures( struct gl_context *ctx );
-extern struct gl_texture_object *
-_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target);
-
extern void
_mesa_delete_nameless_texture(struct gl_context *ctx,
struct gl_texture_object *texObj);
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index 9b5928c4306..cb147fac476 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -34,7 +34,6 @@
#include "context.h"
#include "enums.h"
#include "macros.h"
-#include "shaderimage.h"
#include "texobj.h"
#include "teximage.h"
#include "texstate.h"
@@ -741,8 +740,6 @@ update_texture_state( struct gl_context *ctx )
if (!prog[MESA_SHADER_FRAGMENT] || !prog[MESA_SHADER_VERTEX])
update_texgen(ctx);
-
- _mesa_validate_image_units(ctx);
}
diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c
index 5a3282a40c1..04b7d73da5c 100644
--- a/src/mesa/main/textureview.c
+++ b/src/mesa/main/textureview.c
@@ -681,6 +681,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture,
texObj->Immutable = GL_TRUE;
texObj->ImmutableLevels = origTexObj->ImmutableLevels;
texObj->Target = target;
+ texObj->TargetIndex = _mesa_tex_target_to_index(ctx, target);
+ assert(texObj->TargetIndex < NUM_TEXTURE_TARGETS);
if (ctx->Driver.TextureView != NULL &&
!ctx->Driver.TextureView(ctx, texObj, origTexObj)) {
diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index d48729778ae..083087d6baa 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -318,19 +318,12 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
return;
}
- if ((uni->type->base_type == GLSL_TYPE_DOUBLE &&
- returnType != GLSL_TYPE_DOUBLE) ||
- (uni->type->base_type != GLSL_TYPE_DOUBLE &&
- returnType == GLSL_TYPE_DOUBLE)) {
- _mesa_error( ctx, GL_INVALID_OPERATION,
- "glGetnUniform*vARB(incompatible uniform types)");
- return;
- }
{
unsigned elements = (uni->type->is_sampler())
? 1 : uni->type->components();
const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;
+ const int rmul = returnType == GLSL_TYPE_DOUBLE ? 2 : 1;
/* Calculate the source base address *BEFORE* modifying elements to
* account for the size of the user's buffer.
@@ -342,7 +335,7 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE);
/* doubles have a different size than the other 3 types */
- unsigned bytes = sizeof(src[0]) * elements * dmul;
+ unsigned bytes = sizeof(src[0]) * elements * rmul;
if (bufSize < 0 || bytes > (unsigned) bufSize) {
_mesa_error( ctx, GL_INVALID_OPERATION,
"glGetnUniform*vARB(out of bounds: bufSize is %d,"
@@ -366,32 +359,57 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
} else {
union gl_constant_value *const dst =
(union gl_constant_value *) paramsOut;
-
/* This code could be optimized by putting the loop inside the switch
* statements. However, this is not expected to be
* performance-critical code.
*/
for (unsigned i = 0; i < elements; i++) {
+ int sidx = i * dmul;
+ int didx = i * rmul;
+
switch (returnType) {
case GLSL_TYPE_FLOAT:
switch (uni->type->base_type) {
case GLSL_TYPE_UINT:
- dst[i].f = (float) src[i].u;
+ dst[didx].f = (float) src[sidx].u;
break;
case GLSL_TYPE_INT:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
- dst[i].f = (float) src[i].i;
+ dst[didx].f = (float) src[sidx].i;
break;
case GLSL_TYPE_BOOL:
- dst[i].f = src[i].i ? 1.0f : 0.0f;
+ dst[didx].f = src[sidx].i ? 1.0f : 0.0f;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ dst[didx].f = *(double *)&src[sidx].f;
+ break;
+ default:
+ assert(!"Should not get here.");
+ break;
+ }
+ break;
+ case GLSL_TYPE_DOUBLE:
+ switch (uni->type->base_type) {
+ case GLSL_TYPE_UINT:
+ *(double *)&dst[didx].f = (double) src[sidx].u;
+ break;
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ *(double *)&dst[didx].f = (double) src[sidx].i;
+ break;
+ case GLSL_TYPE_BOOL:
+ *(double *)&dst[didx].f = src[sidx].i ? 1.0f : 0.0f;
+ break;
+ case GLSL_TYPE_FLOAT:
+ *(double *)&dst[didx].f = (double) src[sidx].f;
break;
default:
assert(!"Should not get here.");
break;
}
break;
-
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
switch (uni->type->base_type) {
@@ -413,10 +431,13 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
* a floating-point value is rounded to the
* nearest integer..."
*/
- dst[i].i = IROUND(src[i].f);
+ dst[didx].i = IROUND(src[sidx].f);
break;
case GLSL_TYPE_BOOL:
- dst[i].i = src[i].i ? 1 : 0;
+ dst[didx].i = src[sidx].i ? 1 : 0;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ dst[didx].i = *(double *)&src[sidx].f;
break;
default:
assert(!"Should not get here.");
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 04cc81f9809..bc235380d97 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -1016,21 +1016,21 @@ _mesa_UniformBlockBinding(GLuint program,
return;
}
- if (shProg->UniformBlocks[uniformBlockIndex].Binding !=
+ if (shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding !=
uniformBlockBinding) {
int i;
FLUSH_VERTICES(ctx, 0);
ctx->NewDriverState |= ctx->DriverFlags.NewUniformBuffer;
- shProg->UniformBlocks[uniformBlockIndex].Binding = uniformBlockBinding;
+ shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding = uniformBlockBinding;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
int stage_index = shProg->UniformBlockStageIndex[i][uniformBlockIndex];
if (stage_index != -1) {
struct gl_shader *sh = shProg->_LinkedShaders[i];
- sh->UniformBlocks[stage_index].Binding = uniformBlockBinding;
+ sh->BufferInterfaceBlocks[stage_index].Binding = uniformBlockBinding;
}
}
}
@@ -1069,21 +1069,21 @@ _mesa_ShaderStorageBlockBinding(GLuint program,
return;
}
- if (shProg->UniformBlocks[shaderStorageBlockIndex].Binding !=
+ if (shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding !=
shaderStorageBlockBinding) {
int i;
FLUSH_VERTICES(ctx, 0);
ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
- shProg->UniformBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding;
+ shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
int stage_index = shProg->UniformBlockStageIndex[i][shaderStorageBlockIndex];
if (stage_index != -1) {
struct gl_shader *sh = shProg->_LinkedShaders[i];
- sh->UniformBlocks[stage_index].Binding = shaderStorageBlockBinding;
+ sh->BufferInterfaceBlocks[stage_index].Binding = shaderStorageBlockBinding;
}
}
}
diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h
index bec035cdc97..2f88b65043d 100644
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -27,7 +27,7 @@
#define UNIFORMS_H
#include "main/glheader.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir_uniform.h"
#include "program/prog_parameter.h"
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 498b2f867d0..5635a643200 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -24,6 +24,7 @@
#include <stdio.h>
+#include "context.h"
#include "imports.h"
#include "mtypes.h"
#include "version.h"
@@ -181,7 +182,23 @@ _mesa_override_gl_version(struct gl_context *ctx)
{
if (_mesa_override_gl_version_contextless(&ctx->Const, &ctx->API,
&ctx->Version)) {
- create_version_string(ctx, "");
+ /* We need to include API in version string for OpenGL ES, otherwise
+ * application can not detect GLES via glGetString(GL_VERSION) query.
+ *
+ * From OpenGL ES 3.2 spec, Page 436:
+ *
+ * "The VERSION string is laid out as follows:
+ *
+ * OpenGL ES N.M vendor-specific information"
+ *
+ * From OpenGL 4.5 spec, Page 538:
+ *
+ * "The VERSION and SHADING_LANGUAGE_VERSION strings are laid out as
+ * follows:
+ *
+ * <version number><space><vendor-specific information>"
+ */
+ create_version_string(ctx, _mesa_is_gles(ctx) ? "OpenGL ES " : "");
}
}
diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk
index ccb0fa5f32b..cc67f8aeadd 100644
--- a/src/mesa/program/Android.mk
+++ b/src/mesa/program/Android.mk
@@ -75,6 +75,7 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa \
$(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/glsl/nir \
$(MESA_TOP)/src/gallium/auxiliary \
$(MESA_TOP)/src/gallium/include
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 0214b8e684c..1099d79d834 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -42,7 +42,7 @@
#include "glsl/ir_optimization.h"
#include "glsl/ir_uniform.h"
#include "glsl/glsl_parser_extras.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/linker.h"
#include "glsl/program.h"
#include "program/hash_table.h"
diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index fc00534028f..539e3c05312 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -923,7 +923,7 @@ ptn_add_output_stores(struct ptn_compile *c)
{
nir_builder *b = &c->build;
- foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
+ nir_foreach_variable(var, &b->shader->outputs) {
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
store->num_components = glsl_get_vector_elements(var->type);
@@ -958,11 +958,10 @@ setup_registers_and_variables(struct ptn_compile *c)
for (int i = 0; i < num_inputs; i++) {
if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
continue;
- nir_variable *var = rzalloc(shader, nir_variable);
- var->type = glsl_vec4_type();
- var->data.read_only = true;
- var->data.mode = nir_var_shader_in;
- var->name = ralloc_asprintf(var, "in_%d", i);
+
+ nir_variable *var =
+ nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
+ ralloc_asprintf(shader, "in_%d", i));
var->data.location = i;
var->data.index = 0;
@@ -992,12 +991,9 @@ setup_registers_and_variables(struct ptn_compile *c)
nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
- nir_variable *fullvar = rzalloc(shader, nir_variable);
- fullvar->type = glsl_vec4_type();
- fullvar->data.mode = nir_var_local;
- fullvar->name = "fogcoord_tmp";
- exec_list_push_tail(&b->impl->locals, &fullvar->node);
-
+ nir_variable *fullvar =
+ nir_local_variable_create(b->impl, glsl_vec4_type(),
+ "fogcoord_tmp");
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store->num_components = 4;
@@ -1005,17 +1001,15 @@ setup_registers_and_variables(struct ptn_compile *c)
store->src[0] = nir_src_for_ssa(f001);
nir_builder_instr_insert(b, &store->instr);
- /* Insert the real input into the list so the driver has real
- * inputs, but set c->input_vars[i] to the temporary so we use
+ /* We inserted the real input into the list so the driver has real
+ * inputs, but we set c->input_vars[i] to the temporary so we use
* the splatted value.
*/
- exec_list_push_tail(&shader->inputs, &var->node);
c->input_vars[i] = fullvar;
continue;
}
}
- exec_list_push_tail(&shader->inputs, &var->node);
c->input_vars[i] = var;
}
@@ -1135,6 +1129,12 @@ prog_to_nir(const struct gl_program *prog,
s->info.uses_clip_distance_out = false;
s->info.separate_shader = false;
+ if (stage == MESA_SHADER_FRAGMENT) {
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
+
+ s->info.fs.uses_discard = fp->UsesKill;
+ }
+
fail:
if (c->error) {
ralloc_free(s);
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index e94c1021258..0e78e6ab25d 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -173,57 +173,15 @@ _mesa_set_program_error(struct gl_context *ctx, GLint pos, const char *string)
/**
- * Find the line number and column for 'pos' within 'string'.
- * Return a copy of the line which contains 'pos'. Free the line with
- * free().
- * \param string the program string
- * \param pos the position within the string
- * \param line returns the line number corresponding to 'pos'.
- * \param col returns the column number corresponding to 'pos'.
- * \return copy of the line containing 'pos'.
- */
-const GLubyte *
-_mesa_find_line_column(const GLubyte *string, const GLubyte *pos,
- GLint *line, GLint *col)
-{
- const GLubyte *lineStart = string;
- const GLubyte *p = string;
- GLubyte *s;
- int len;
-
- *line = 1;
-
- while (p != pos) {
- if (*p == (GLubyte) '\n') {
- (*line)++;
- lineStart = p + 1;
- }
- p++;
- }
-
- *col = (pos - lineStart) + 1;
-
- /* return copy of this line */
- while (*p != 0 && *p != '\n')
- p++;
- len = p - lineStart;
- s = malloc(len + 1);
- memcpy(s, lineStart, len);
- s[len] = 0;
-
- return s;
-}
-
-
-/**
* Initialize a new gl_program object.
*/
-static void
-init_program_struct(struct gl_program *prog, GLenum target, GLuint id)
+struct gl_program *
+_mesa_init_gl_program(struct gl_program *prog, GLenum target, GLuint id)
{
GLuint i;
- assert(prog);
+ if (!prog)
+ return NULL;
memset(prog, 0, sizeof(*prog));
mtx_init(&prog->Mutex, mtx_plain);
@@ -235,102 +193,8 @@ init_program_struct(struct gl_program *prog, GLenum target, GLuint id)
/* default mapping from samplers to texture units */
for (i = 0; i < MAX_SAMPLERS; i++)
prog->SamplerUnits[i] = i;
-}
-
-
-/**
- * Initialize a new fragment program object.
- */
-struct gl_program *
-_mesa_init_fragment_program(struct gl_context *ctx,
- struct gl_fragment_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-
-/**
- * Initialize a new vertex program object.
- */
-struct gl_program *
-_mesa_init_vertex_program(struct gl_context *ctx,
- struct gl_vertex_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-
-/**
- * Initialize a new compute program object.
- */
-struct gl_program *
-_mesa_init_compute_program(struct gl_context *ctx,
- struct gl_compute_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-
-/**
- * Initialize a new tessellation control program object.
- */
-struct gl_program *
-_mesa_init_tess_ctrl_program(struct gl_context *ctx,
- struct gl_tess_ctrl_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-/**
- * Initialize a new tessellation evaluation program object.
- */
-struct gl_program *
-_mesa_init_tess_eval_program(struct gl_context *ctx,
- struct gl_tess_eval_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
-}
-
-
-/**
- * Initialize a new geometry program object.
- */
-struct gl_program *
-_mesa_init_geometry_program(struct gl_context *ctx,
- struct gl_geometry_program *prog,
- GLenum target, GLuint id)
-{
- if (prog) {
- init_program_struct(&prog->Base, target, id);
- return &prog->Base;
- }
- return NULL;
+ return prog;
}
@@ -349,43 +213,36 @@ _mesa_init_geometry_program(struct gl_context *ctx,
struct gl_program *
_mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id)
{
- struct gl_program *prog;
switch (target) {
- case GL_VERTEX_PROGRAM_ARB: /* == GL_VERTEX_PROGRAM_NV */
- prog = _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
- target, id );
- break;
+ case GL_VERTEX_PROGRAM_ARB: { /* == GL_VERTEX_PROGRAM_NV */
+ struct gl_vertex_program *prog = CALLOC_STRUCT(gl_vertex_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
case GL_FRAGMENT_PROGRAM_NV:
- case GL_FRAGMENT_PROGRAM_ARB:
- prog =_mesa_init_fragment_program(ctx,
- CALLOC_STRUCT(gl_fragment_program),
- target, id );
- break;
- case GL_GEOMETRY_PROGRAM_NV:
- prog = _mesa_init_geometry_program(ctx,
- CALLOC_STRUCT(gl_geometry_program),
- target, id);
- break;
- case GL_TESS_CONTROL_PROGRAM_NV:
- prog = _mesa_init_tess_ctrl_program(ctx,
- CALLOC_STRUCT(gl_tess_ctrl_program),
- target, id);
- break;
- case GL_TESS_EVALUATION_PROGRAM_NV:
- prog = _mesa_init_tess_eval_program(ctx,
- CALLOC_STRUCT(gl_tess_eval_program),
- target, id);
- break;
- case GL_COMPUTE_PROGRAM_NV:
- prog = _mesa_init_compute_program(ctx,
- CALLOC_STRUCT(gl_compute_program),
- target, id);
- break;
+ case GL_FRAGMENT_PROGRAM_ARB: {
+ struct gl_fragment_program *prog = CALLOC_STRUCT(gl_fragment_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
+ case GL_GEOMETRY_PROGRAM_NV: {
+ struct gl_geometry_program *prog = CALLOC_STRUCT(gl_geometry_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
+ case GL_TESS_CONTROL_PROGRAM_NV: {
+ struct gl_tess_ctrl_program *prog = CALLOC_STRUCT(gl_tess_ctrl_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
+ case GL_TESS_EVALUATION_PROGRAM_NV: {
+ struct gl_tess_eval_program *prog = CALLOC_STRUCT(gl_tess_eval_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
+ case GL_COMPUTE_PROGRAM_NV: {
+ struct gl_compute_program *prog = CALLOC_STRUCT(gl_compute_program);
+ return _mesa_init_gl_program(&prog->Base, target, id);
+ }
default:
_mesa_problem(ctx, "bad target in _mesa_new_program");
- prog = NULL;
+ return NULL;
}
- return prog;
}
@@ -494,123 +351,6 @@ _mesa_reference_program_(struct gl_context *ctx,
/**
- * Return a copy of a program.
- * XXX Problem here if the program object is actually OO-derivation
- * made by a device driver.
- */
-struct gl_program *
-_mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog)
-{
- struct gl_program *clone;
-
- clone = ctx->Driver.NewProgram(ctx, prog->Target, prog->Id);
- if (!clone)
- return NULL;
-
- assert(clone->Target == prog->Target);
- assert(clone->RefCount == 1);
-
- clone->String = (GLubyte *) strdup((char *) prog->String);
- clone->Format = prog->Format;
- clone->Instructions = _mesa_alloc_instructions(prog->NumInstructions);
- if (!clone->Instructions) {
- _mesa_reference_program(ctx, &clone, NULL);
- return NULL;
- }
- _mesa_copy_instructions(clone->Instructions, prog->Instructions,
- prog->NumInstructions);
- clone->InputsRead = prog->InputsRead;
- clone->OutputsWritten = prog->OutputsWritten;
- clone->SamplersUsed = prog->SamplersUsed;
- clone->ShadowSamplers = prog->ShadowSamplers;
- memcpy(clone->TexturesUsed, prog->TexturesUsed, sizeof(prog->TexturesUsed));
-
- if (prog->Parameters)
- clone->Parameters = _mesa_clone_parameter_list(prog->Parameters);
- if (prog->LocalParams) {
- clone->LocalParams = malloc(MAX_PROGRAM_LOCAL_PARAMS *
- sizeof(float[4]));
- if (!clone->LocalParams) {
- _mesa_reference_program(ctx, &clone, NULL);
- return NULL;
- }
- memcpy(clone->LocalParams, prog->LocalParams,
- MAX_PROGRAM_LOCAL_PARAMS * sizeof(float[4]));
- }
- clone->IndirectRegisterFiles = prog->IndirectRegisterFiles;
- clone->NumInstructions = prog->NumInstructions;
- clone->NumTemporaries = prog->NumTemporaries;
- clone->NumParameters = prog->NumParameters;
- clone->NumAttributes = prog->NumAttributes;
- clone->NumAddressRegs = prog->NumAddressRegs;
- clone->NumNativeInstructions = prog->NumNativeInstructions;
- clone->NumNativeTemporaries = prog->NumNativeTemporaries;
- clone->NumNativeParameters = prog->NumNativeParameters;
- clone->NumNativeAttributes = prog->NumNativeAttributes;
- clone->NumNativeAddressRegs = prog->NumNativeAddressRegs;
- clone->NumAluInstructions = prog->NumAluInstructions;
- clone->NumTexInstructions = prog->NumTexInstructions;
- clone->NumTexIndirections = prog->NumTexIndirections;
- clone->NumNativeAluInstructions = prog->NumNativeAluInstructions;
- clone->NumNativeTexInstructions = prog->NumNativeTexInstructions;
- clone->NumNativeTexIndirections = prog->NumNativeTexIndirections;
-
- switch (prog->Target) {
- case GL_VERTEX_PROGRAM_ARB:
- {
- const struct gl_vertex_program *vp = gl_vertex_program_const(prog);
- struct gl_vertex_program *vpc = gl_vertex_program(clone);
- vpc->IsPositionInvariant = vp->IsPositionInvariant;
- }
- break;
- case GL_FRAGMENT_PROGRAM_ARB:
- {
- const struct gl_fragment_program *fp = gl_fragment_program_const(prog);
- struct gl_fragment_program *fpc = gl_fragment_program(clone);
- fpc->UsesKill = fp->UsesKill;
- fpc->UsesDFdy = fp->UsesDFdy;
- fpc->OriginUpperLeft = fp->OriginUpperLeft;
- fpc->PixelCenterInteger = fp->PixelCenterInteger;
- }
- break;
- case GL_GEOMETRY_PROGRAM_NV:
- {
- const struct gl_geometry_program *gp = gl_geometry_program_const(prog);
- struct gl_geometry_program *gpc = gl_geometry_program(clone);
- gpc->VerticesOut = gp->VerticesOut;
- gpc->InputType = gp->InputType;
- gpc->Invocations = gp->Invocations;
- gpc->OutputType = gp->OutputType;
- gpc->UsesEndPrimitive = gp->UsesEndPrimitive;
- gpc->UsesStreams = gp->UsesStreams;
- }
- break;
- case GL_TESS_CONTROL_PROGRAM_NV:
- {
- const struct gl_tess_ctrl_program *tcp = gl_tess_ctrl_program_const(prog);
- struct gl_tess_ctrl_program *tcpc = gl_tess_ctrl_program(clone);
- tcpc->VerticesOut = tcp->VerticesOut;
- }
- break;
- case GL_TESS_EVALUATION_PROGRAM_NV:
- {
- const struct gl_tess_eval_program *tep = gl_tess_eval_program_const(prog);
- struct gl_tess_eval_program *tepc = gl_tess_eval_program(clone);
- tepc->PrimitiveMode = tep->PrimitiveMode;
- tepc->Spacing = tep->Spacing;
- tepc->VertexOrder = tep->VertexOrder;
- tepc->PointMode = tep->PointMode;
- }
- break;
- default:
- _mesa_problem(NULL, "Unexpected target in _mesa_clone_program");
- }
-
- return clone;
-}
-
-
-/**
* Insert 'count' NOP instructions at 'start' in the given program.
* Adjust branch targets accordingly.
*/
@@ -707,190 +447,6 @@ _mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count)
/**
- * Search instructions for registers that match (oldFile, oldIndex),
- * replacing them with (newFile, newIndex).
- */
-static void
-replace_registers(struct prog_instruction *inst, GLuint numInst,
- GLuint oldFile, GLuint oldIndex,
- GLuint newFile, GLuint newIndex)
-{
- GLuint i, j;
- for (i = 0; i < numInst; i++) {
- /* src regs */
- for (j = 0; j < _mesa_num_inst_src_regs(inst[i].Opcode); j++) {
- if (inst[i].SrcReg[j].File == oldFile &&
- inst[i].SrcReg[j].Index == oldIndex) {
- inst[i].SrcReg[j].File = newFile;
- inst[i].SrcReg[j].Index = newIndex;
- }
- }
- /* dst reg */
- if (inst[i].DstReg.File == oldFile && inst[i].DstReg.Index == oldIndex) {
- inst[i].DstReg.File = newFile;
- inst[i].DstReg.Index = newIndex;
- }
- }
-}
-
-
-/**
- * Search instructions for references to program parameters. When found,
- * increment the parameter index by 'offset'.
- * Used when combining programs.
- */
-static void
-adjust_param_indexes(struct prog_instruction *inst, GLuint numInst,
- GLuint offset)
-{
- GLuint i, j;
- for (i = 0; i < numInst; i++) {
- for (j = 0; j < _mesa_num_inst_src_regs(inst[i].Opcode); j++) {
- GLuint f = inst[i].SrcReg[j].File;
- if (f == PROGRAM_CONSTANT ||
- f == PROGRAM_UNIFORM ||
- f == PROGRAM_STATE_VAR) {
- inst[i].SrcReg[j].Index += offset;
- }
- }
- }
-}
-
-
-/**
- * Combine two programs into one. Fix instructions so the outputs of
- * the first program go to the inputs of the second program.
- */
-struct gl_program *
-_mesa_combine_programs(struct gl_context *ctx,
- const struct gl_program *progA,
- const struct gl_program *progB)
-{
- struct prog_instruction *newInst;
- struct gl_program *newProg;
- const GLuint lenA = progA->NumInstructions - 1; /* omit END instr */
- const GLuint lenB = progB->NumInstructions;
- const GLuint numParamsA = _mesa_num_parameters(progA->Parameters);
- const GLuint newLength = lenA + lenB;
- GLboolean usedTemps[MAX_PROGRAM_TEMPS];
- GLuint firstTemp = 0;
- GLbitfield64 inputsB;
- GLuint i;
-
- assert(progA->Target == progB->Target);
-
- newInst = _mesa_alloc_instructions(newLength);
- if (!newInst)
- return GL_FALSE;
-
- _mesa_copy_instructions(newInst, progA->Instructions, lenA);
- _mesa_copy_instructions(newInst + lenA, progB->Instructions, lenB);
-
- /* adjust branch / instruction addresses for B's instructions */
- for (i = 0; i < lenB; i++) {
- newInst[lenA + i].BranchTarget += lenA;
- }
-
- newProg = ctx->Driver.NewProgram(ctx, progA->Target, 0);
- newProg->Instructions = newInst;
- newProg->NumInstructions = newLength;
-
- /* find used temp regs (we may need new temps below) */
- _mesa_find_used_registers(newProg, PROGRAM_TEMPORARY,
- usedTemps, MAX_PROGRAM_TEMPS);
-
- if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) {
- const struct gl_fragment_program *fprogA, *fprogB;
- struct gl_fragment_program *newFprog;
- GLbitfield64 progB_inputsRead = progB->InputsRead;
- GLint progB_colorFile, progB_colorIndex;
-
- fprogA = gl_fragment_program_const(progA);
- fprogB = gl_fragment_program_const(progB);
- newFprog = gl_fragment_program(newProg);
-
- newFprog->UsesKill = fprogA->UsesKill || fprogB->UsesKill;
- newFprog->UsesDFdy = fprogA->UsesDFdy || fprogB->UsesDFdy;
-
- /* We'll do a search and replace for instances
- * of progB_colorFile/progB_colorIndex below...
- */
- progB_colorFile = PROGRAM_INPUT;
- progB_colorIndex = VARYING_SLOT_COL0;
-
- /*
- * The fragment program may get color from a state var rather than
- * a fragment input (vertex output) if it's constant.
- * See the texenvprogram.c code.
- * So, search the program's parameter list now to see if the program
- * gets color from a state var instead of a conventional fragment
- * input register.
- */
- for (i = 0; i < progB->Parameters->NumParameters; i++) {
- struct gl_program_parameter *p = &progB->Parameters->Parameters[i];
- if (p->Type == PROGRAM_STATE_VAR &&
- p->StateIndexes[0] == STATE_INTERNAL &&
- p->StateIndexes[1] == STATE_CURRENT_ATTRIB &&
- (int) p->StateIndexes[2] == (int) VERT_ATTRIB_COLOR0) {
- progB_inputsRead |= VARYING_BIT_COL0;
- progB_colorFile = PROGRAM_STATE_VAR;
- progB_colorIndex = i;
- break;
- }
- }
-
- /* Connect color outputs of fprogA to color inputs of fprogB, via a
- * new temporary register.
- */
- if ((progA->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) &&
- (progB_inputsRead & VARYING_BIT_COL0)) {
- GLint tempReg = _mesa_find_free_register(usedTemps, MAX_PROGRAM_TEMPS,
- firstTemp);
- if (tempReg < 0) {
- _mesa_problem(ctx, "No free temp regs found in "
- "_mesa_combine_programs(), using 31");
- tempReg = 31;
- }
- firstTemp = tempReg + 1;
-
- /* replace writes to result.color[0] with tempReg */
- replace_registers(newInst, lenA,
- PROGRAM_OUTPUT, FRAG_RESULT_COLOR,
- PROGRAM_TEMPORARY, tempReg);
- /* replace reads from the input color with tempReg */
- replace_registers(newInst + lenA, lenB,
- progB_colorFile, progB_colorIndex, /* search for */
- PROGRAM_TEMPORARY, tempReg /* replace with */ );
- }
-
- /* compute combined program's InputsRead */
- inputsB = progB_inputsRead;
- if (progA->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) {
- inputsB &= ~(1 << VARYING_SLOT_COL0);
- }
- newProg->InputsRead = progA->InputsRead | inputsB;
- newProg->OutputsWritten = progB->OutputsWritten;
- newProg->SamplersUsed = progA->SamplersUsed | progB->SamplersUsed;
- }
- else {
- /* vertex program */
- assert(0); /* XXX todo */
- }
-
- /*
- * Merge parameters (uniforms, constants, etc)
- */
- newProg->Parameters = _mesa_combine_parameter_lists(progA->Parameters,
- progB->Parameters);
-
- adjust_param_indexes(newInst + lenA, lenB, numParamsA);
-
-
- return newProg;
-}
-
-
-/**
* Populate the 'used' array with flags indicating which registers (TEMPs,
* INPUTs, OUTPUTs, etc, are used by the given program.
* \param file type of register to scan for
@@ -952,140 +508,6 @@ _mesa_find_free_register(const GLboolean used[],
}
-
-/**
- * Check if the given register index is valid (doesn't exceed implementation-
- * dependent limits).
- * \return GL_TRUE if OK, GL_FALSE if bad index
- */
-GLboolean
-_mesa_valid_register_index(const struct gl_context *ctx,
- gl_shader_stage shaderType,
- gl_register_file file, GLint index)
-{
- const struct gl_program_constants *c;
-
- assert(0 <= shaderType && shaderType < MESA_SHADER_STAGES);
- c = &ctx->Const.Program[shaderType];
-
- switch (file) {
- case PROGRAM_UNDEFINED:
- return GL_TRUE; /* XXX or maybe false? */
-
- case PROGRAM_TEMPORARY:
- return index >= 0 && index < (GLint) c->MaxTemps;
-
- case PROGRAM_UNIFORM:
- case PROGRAM_STATE_VAR:
- /* aka constant buffer */
- return index >= 0 && index < (GLint) c->MaxUniformComponents / 4;
-
- case PROGRAM_CONSTANT:
- /* constant buffer w/ possible relative negative addressing */
- return (index > (int) c->MaxUniformComponents / -4 &&
- index < (int) c->MaxUniformComponents / 4);
-
- case PROGRAM_INPUT:
- if (index < 0)
- return GL_FALSE;
-
- switch (shaderType) {
- case MESA_SHADER_VERTEX:
- return index < VERT_ATTRIB_GENERIC0 + (GLint) c->MaxAttribs;
- case MESA_SHADER_FRAGMENT:
- return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying;
- case MESA_SHADER_GEOMETRY:
- return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying;
- default:
- return GL_FALSE;
- }
-
- case PROGRAM_OUTPUT:
- if (index < 0)
- return GL_FALSE;
-
- switch (shaderType) {
- case MESA_SHADER_VERTEX:
- return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying;
- case MESA_SHADER_FRAGMENT:
- return index < FRAG_RESULT_DATA0 + (GLint) ctx->Const.MaxDrawBuffers;
- case MESA_SHADER_GEOMETRY:
- return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying;
- default:
- return GL_FALSE;
- }
-
- case PROGRAM_ADDRESS:
- return index >= 0 && index < (GLint) c->MaxAddressRegs;
-
- default:
- _mesa_problem(ctx,
- "unexpected register file in _mesa_valid_register_index()");
- return GL_FALSE;
- }
-}
-
-
-
-/**
- * "Post-process" a GPU program. This is intended to be used for debugging.
- * Example actions include no-op'ing instructions or changing instruction
- * behaviour.
- */
-void
-_mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog)
-{
- static const GLfloat white[4] = { 0.5, 0.5, 0.5, 0.5 };
- GLuint i;
- GLuint whiteSwizzle;
- GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters,
- (gl_constant_value *) white,
- 4, &whiteSwizzle);
-
- (void) whiteIndex;
-
- for (i = 0; i < prog->NumInstructions; i++) {
- struct prog_instruction *inst = prog->Instructions + i;
- const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
-
- (void) n;
-
- if (_mesa_is_tex_instruction(inst->Opcode)) {
-#if 0
- /* replace TEX/TXP/TXB with MOV */
- inst->Opcode = OPCODE_MOV;
- inst->DstReg.WriteMask = WRITEMASK_XYZW;
- inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
- inst->SrcReg[0].Negate = NEGATE_NONE;
-#endif
-
-#if 0
- /* disable shadow texture mode */
- inst->TexShadow = 0;
-#endif
- }
-
- if (inst->Opcode == OPCODE_TXP) {
-#if 0
- inst->Opcode = OPCODE_MOV;
- inst->DstReg.WriteMask = WRITEMASK_XYZW;
- inst->SrcReg[0].File = PROGRAM_CONSTANT;
- inst->SrcReg[0].Index = whiteIndex;
- inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
- inst->SrcReg[0].Negate = NEGATE_NONE;
-#endif
-#if 0
- inst->TexShadow = 0;
-#endif
-#if 0
- inst->Opcode = OPCODE_TEX;
- inst->TexShadow = 0;
-#endif
- }
-
- }
-}
-
/* Gets the minimum number of shader invocations per fragment.
* This function is useful to determine if we need to do per
* sample shading or per fragment shading.
diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h
index a894147cafd..24e05974dc3 100644
--- a/src/mesa/program/program.h
+++ b/src/mesa/program/program.h
@@ -63,40 +63,8 @@ _mesa_update_default_objects_program(struct gl_context *ctx);
extern void
_mesa_set_program_error(struct gl_context *ctx, GLint pos, const char *string);
-extern const GLubyte *
-_mesa_find_line_column(const GLubyte *string, const GLubyte *pos,
- GLint *line, GLint *col);
-
-
-extern struct gl_program *
-_mesa_init_vertex_program(struct gl_context *ctx,
- struct gl_vertex_program *prog,
- GLenum target, GLuint id);
-
-extern struct gl_program *
-_mesa_init_fragment_program(struct gl_context *ctx,
- struct gl_fragment_program *prog,
- GLenum target, GLuint id);
-
extern struct gl_program *
-_mesa_init_tess_ctrl_program(struct gl_context *ctx,
- struct gl_tess_ctrl_program *prog,
- GLenum target, GLuint id);
-
-extern struct gl_program *
-_mesa_init_tess_eval_program(struct gl_context *ctx,
- struct gl_tess_eval_program *prog,
- GLenum target, GLuint id);
-
-extern struct gl_program *
-_mesa_init_geometry_program(struct gl_context *ctx,
- struct gl_geometry_program *prog,
- GLenum target, GLuint id);
-
-extern struct gl_program *
-_mesa_init_compute_program(struct gl_context *ctx,
- struct gl_compute_program *prog,
- GLenum target, GLuint id);
+_mesa_init_gl_program(struct gl_program *prog, GLenum target, GLuint id);
extern struct gl_program *
_mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id);
@@ -176,56 +144,12 @@ _mesa_reference_tesseprog(struct gl_context *ctx,
(struct gl_program *) prog);
}
-extern struct gl_program *
-_mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog);
-
-static inline struct gl_vertex_program *
-_mesa_clone_vertex_program(struct gl_context *ctx,
- const struct gl_vertex_program *prog)
-{
- return (struct gl_vertex_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-static inline struct gl_tess_ctrl_program *
-_mesa_clone_tess_ctrl_program(struct gl_context *ctx,
- const struct gl_tess_ctrl_program *prog)
-{
- return (struct gl_tess_ctrl_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-static inline struct gl_tess_eval_program *
-_mesa_clone_tess_eval_program(struct gl_context *ctx,
- const struct gl_tess_eval_program *prog)
-{
- return (struct gl_tess_eval_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-static inline struct gl_geometry_program *
-_mesa_clone_geometry_program(struct gl_context *ctx,
- const struct gl_geometry_program *prog)
-{
- return (struct gl_geometry_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-static inline struct gl_fragment_program *
-_mesa_clone_fragment_program(struct gl_context *ctx,
- const struct gl_fragment_program *prog)
-{
- return (struct gl_fragment_program *) _mesa_clone_program(ctx, &prog->Base);
-}
-
-
extern GLboolean
_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count);
extern GLboolean
_mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count);
-extern struct gl_program *
-_mesa_combine_programs(struct gl_context *ctx,
- const struct gl_program *progA,
- const struct gl_program *progB);
-
extern void
_mesa_find_used_registers(const struct gl_program *prog,
gl_register_file file,
@@ -235,15 +159,6 @@ extern GLint
_mesa_find_free_register(const GLboolean used[],
GLuint maxRegs, GLuint firstReg);
-
-extern GLboolean
-_mesa_valid_register_index(const struct gl_context *ctx,
- gl_shader_stage shaderType,
- gl_register_file file, GLint index);
-
-extern void
-_mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog);
-
extern GLint
_mesa_get_min_invocations_per_fragment(struct gl_context *ctx,
const struct gl_fragment_program *prog,
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index 1198a3c45f1..84e2504baba 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -24,7 +24,7 @@
*/
#include "main/mtypes.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "glsl/ir.h"
#include "glsl/ir_uniform.h"
#include "glsl/ir_visitor.h"
diff --git a/src/mesa/state_tracker/st_atom_clip.c b/src/mesa/state_tracker/st_atom_clip.c
index 506a770499f..b820d843385 100644
--- a/src/mesa/state_tracker/st_atom_clip.c
+++ b/src/mesa/state_tracker/st_atom_clip.c
@@ -56,6 +56,9 @@ static void update_clip( struct st_context *st )
use_eye = TRUE;
}
+ /* _ClipUserPlane = _NEW_TRANSFORM | _NEW_PROJECTION
+ * EyeUserPlane = _NEW_TRANSFORM
+ */
memcpy(clip.ucp,
use_eye ? ctx->Transform.EyeUserPlane
: ctx->Transform._ClipUserPlane, sizeof(clip.ucp));
@@ -70,7 +73,7 @@ static void update_clip( struct st_context *st )
const struct st_tracked_state st_update_clip = {
"st_update_clip", /* name */
{ /* dirty */
- _NEW_TRANSFORM, /* mesa */
+ _NEW_TRANSFORM | _NEW_PROJECTION, /* mesa */
ST_NEW_VERTEX_PROGRAM, /* st */
},
update_clip /* update */
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index 6affb4d84d5..acaa85d9356 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -238,7 +238,7 @@ static void st_bind_ubos(struct st_context *st,
struct gl_uniform_buffer_binding *binding;
struct st_buffer_object *st_obj;
- binding = &st->ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
+ binding = &st->ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
st_obj = st_buffer_object(binding->BufferObject);
cb.buffer = st_obj->buffer;
diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index a04163cc137..f94c358afba 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -25,65 +25,17 @@
*
**************************************************************************/
-/*
- * Generate fragment programs to implement pixel transfer ops, such as
- * scale/bias, colortable, convolution...
- *
- * Authors:
+/* Authors:
* Brian Paul
*/
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/macros.h"
-#include "program/program.h"
-#include "program/prog_cache.h"
-#include "program/prog_instruction.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-
#include "st_context.h"
-#include "st_format.h"
#include "st_texture.h"
-#include "pipe/p_screen.h"
-#include "pipe/p_context.h"
#include "util/u_inlines.h"
#include "util/u_pack_color.h"
-struct state_key
-{
- GLuint scaleAndBias:1;
- GLuint pixelMaps:1;
-
-#if 0
- GLfloat Maps[3][256][4];
- int NumMaps;
- GLint NumStages;
- pipeline_stage Stages[STAGE_MAX];
- GLboolean StagesUsed[STAGE_MAX];
- GLfloat Scale1[4], Bias1[4];
- GLfloat Scale2[4], Bias2[4];
-#endif
-};
-
-static void
-make_state_key(struct gl_context *ctx, struct state_key *key)
-{
- memset(key, 0, sizeof(*key));
-
- if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
- ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
- ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
- ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
- key->scaleAndBias = 1;
- }
-
- key->pixelMaps = ctx->Pixel.MapColorFlag;
-}
-
-
/**
* Update the pixelmap texture with the contents of the R/G/B/A pixel maps.
*/
@@ -128,74 +80,15 @@ load_color_map_texture(struct gl_context *ctx, struct pipe_resource *pt)
pipe_transfer_unmap(pipe, transfer);
}
-
-
-#define MAX_INST 100
-
/**
- * Returns a fragment program which implements the current pixel transfer ops.
+ * Upload the pixel transfer color map texture.
*/
-static struct gl_fragment_program *
-get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key)
+static void
+update_pixel_transfer(struct st_context *st)
{
- struct st_context *st = st_context(ctx);
- struct prog_instruction inst[MAX_INST];
- struct gl_program_parameter_list *params;
- struct gl_fragment_program *fp;
- GLuint ic = 0;
- const GLuint colorTemp = 0;
-
- fp = (struct gl_fragment_program *)
- ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!fp)
- return NULL;
-
- params = _mesa_new_parameter_list();
-
- /*
- * Get initial pixel color from the texture.
- * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
- */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_TEX;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = colorTemp;
- inst[ic].SrcReg[0].File = PROGRAM_INPUT;
- inst[ic].SrcReg[0].Index = VARYING_SLOT_TEX0;
- inst[ic].TexSrcUnit = 0;
- inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
- fp->Base.InputsRead = BITFIELD64_BIT(VARYING_SLOT_TEX0);
- fp->Base.OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR);
- fp->Base.SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */
-
- if (key->scaleAndBias) {
- static const gl_state_index scale_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_SCALE, 0, 0, 0 };
- static const gl_state_index bias_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_BIAS, 0, 0, 0 };
- GLint scale_p, bias_p;
-
- scale_p = _mesa_add_state_reference(params, scale_state);
- bias_p = _mesa_add_state_reference(params, bias_state);
-
- /* MAD colorTemp, colorTemp, scale, bias; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_MAD;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = colorTemp;
- inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
- inst[ic].SrcReg[0].Index = colorTemp;
- inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR;
- inst[ic].SrcReg[1].Index = scale_p;
- inst[ic].SrcReg[2].File = PROGRAM_STATE_VAR;
- inst[ic].SrcReg[2].Index = bias_p;
- ic++;
- }
-
- if (key->pixelMaps) {
- const GLuint temp = 1;
+ struct gl_context *ctx = st->ctx;
+ if (ctx->Pixel.MapColorFlag) {
/* create the colormap/texture now if not already done */
if (!st->pixel_xfer.pixelmap_texture) {
st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
@@ -203,117 +96,11 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key)
st_create_texture_sampler_view(st->pipe,
st->pixel_xfer.pixelmap_texture);
}
-
- /* with a little effort, we can do four pixel map look-ups with
- * two TEX instructions:
- */
-
- /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_TEX;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = temp;
- inst[ic].DstReg.WriteMask = WRITEMASK_XY; /* write R,G */
- inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
- inst[ic].SrcReg[0].Index = colorTemp;
- inst[ic].TexSrcUnit = 1;
- inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
-
- /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_TEX;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = temp;
- inst[ic].DstReg.WriteMask = WRITEMASK_ZW; /* write B,A */
- inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
- inst[ic].SrcReg[0].Index = colorTemp;
- inst[ic].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W,
- SWIZZLE_Z, SWIZZLE_W);
- inst[ic].TexSrcUnit = 1;
- inst[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
-
- /* MOV colorTemp, temp; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_MOV;
- inst[ic].DstReg.File = PROGRAM_TEMPORARY;
- inst[ic].DstReg.Index = colorTemp;
- inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
- inst[ic].SrcReg[0].Index = temp;
- ic++;
-
- fp->Base.SamplersUsed |= (1 << 1); /* sampler 1 is used */
- }
-
- /* Modify last instruction's dst reg to write to result.color */
- {
- struct prog_instruction *last = &inst[ic - 1];
- last->DstReg.File = PROGRAM_OUTPUT;
- last->DstReg.Index = FRAG_RESULT_COLOR;
- }
-
- /* END; */
- _mesa_init_instructions(inst + ic, 1);
- inst[ic].Opcode = OPCODE_END;
- ic++;
-
- assert(ic <= MAX_INST);
-
-
- fp->Base.Instructions = _mesa_alloc_instructions(ic);
- if (!fp->Base.Instructions) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY,
- "generating pixel transfer program");
- _mesa_free_parameter_list(params);
- return NULL;
- }
-
- _mesa_copy_instructions(fp->Base.Instructions, inst, ic);
- fp->Base.NumInstructions = ic;
- fp->Base.Parameters = params;
-
-#if 0
- printf("========= pixel transfer prog\n");
- _mesa_print_program(&fp->Base);
- _mesa_print_parameter_list(fp->Base.Parameters);
-#endif
-
- return fp;
-}
-
-
-
-/**
- * Update st->pixel_xfer.program in response to new pixel-transfer state.
- */
-static void
-update_pixel_transfer(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- struct state_key key;
- struct gl_fragment_program *fp;
-
- make_state_key(st->ctx, &key);
-
- fp = (struct gl_fragment_program *)
- _mesa_search_program_cache(st->pixel_xfer.cache, &key, sizeof(key));
- if (!fp) {
- fp = get_pixel_transfer_program(st->ctx, &key);
- _mesa_program_cache_insert(st->ctx, st->pixel_xfer.cache,
- &key, sizeof(key), &fp->Base);
- }
-
- if (ctx->Pixel.MapColorFlag) {
load_color_map_texture(ctx, st->pixel_xfer.pixelmap_texture);
}
- st->pixel_xfer.pixelmap_enabled = ctx->Pixel.MapColorFlag;
-
- st->pixel_xfer.program = (struct st_fragment_program *) fp;
}
-
const struct st_tracked_state st_update_pixel_transfer = {
"st_update_pixel_transfer", /* name */
{ /* dirty */
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 230eba8c4a5..bb6dfe85644 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -108,151 +108,6 @@ struct bitmap_cache
/**
- * Make fragment program for glBitmap:
- * Sample the texture and kill the fragment if the bit is 0.
- * This program will be combined with the user's fragment program.
- */
-static struct st_fragment_program *
-make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
-{
- struct st_context *st = st_context(ctx);
- struct st_fragment_program *stfp;
- struct gl_program *p;
- GLuint ic = 0;
-
- p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!p)
- return NULL;
-
- p->NumInstructions = 3;
-
- p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
- if (!p->Instructions) {
- ctx->Driver.DeleteProgram(ctx, p);
- return NULL;
- }
- _mesa_init_instructions(p->Instructions, p->NumInstructions);
-
- /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY;
- p->Instructions[ic].DstReg.Index = 0;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_TEX0;
- p->Instructions[ic].TexSrcUnit = samplerIndex;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
-
- /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
- p->Instructions[ic].Opcode = OPCODE_KIL;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
-
- if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
- p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX;
-
- p->Instructions[ic].SrcReg[0].Index = 0;
- p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW;
- ic++;
-
- /* END; */
- p->Instructions[ic++].Opcode = OPCODE_END;
-
- assert(ic == p->NumInstructions);
-
- p->InputsRead = VARYING_BIT_TEX0;
- p->OutputsWritten = 0x0;
- p->SamplersUsed = (1 << samplerIndex);
-
- stfp = (struct st_fragment_program *) p;
- stfp->Base.UsesKill = GL_TRUE;
-
- return stfp;
-}
-
-
-static struct gl_program *
-make_bitmap_fragment_program_glsl(struct st_context *st,
- struct st_fragment_program *orig,
- GLuint samplerIndex)
-{
- struct gl_context *ctx = st->ctx;
- struct st_fragment_program *fp = (struct st_fragment_program *)
- ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
-
- if (!fp)
- return NULL;
-
- get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex);
- return &fp->Base.Base;
-}
-
-
-static int
-find_free_bit(uint bitfield)
-{
- int i;
- for (i = 0; i < 32; i++) {
- if ((bitfield & (1 << i)) == 0) {
- return i;
- }
- }
- return -1;
-}
-
-
-/**
- * Combine basic bitmap fragment program with the user-defined program.
- * \param st current context
- * \param fpIn the incoming fragment program
- * \param fpOut the new fragment program which does fragment culling
- * \param bitmap_sampler sampler number for the bitmap texture
- */
-void
-st_make_bitmap_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut,
- GLuint *bitmap_sampler)
-{
- struct st_fragment_program *bitmap_prog;
- struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn;
- struct gl_program *newProg;
- uint sampler;
-
- /*
- * Generate new program which is the user-defined program prefixed
- * with the bitmap sampler/kill instructions.
- */
- sampler = find_free_bit(fpIn->Base.SamplersUsed);
-
- if (stfpIn->glsl_to_tgsi)
- newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler);
- else {
- bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
-
- newProg = _mesa_combine_programs(st->ctx,
- &bitmap_prog->Base.Base,
- &fpIn->Base);
- /* done with this after combining */
- st_reference_fragprog(st, &bitmap_prog, NULL);
- }
-
-#if 0
- {
- printf("Combined bitmap program:\n");
- _mesa_print_program(newProg);
- printf("InputsRead: 0x%x\n", newProg->InputsRead);
- printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
- _mesa_print_parameter_list(newProg->Parameters);
- }
-#endif
-
- /* return results */
- *fpOut = (struct gl_fragment_program *) newProg;
- *bitmap_sampler = sampler;
-}
-
-
-/**
* Copy user-provide bitmap bits into texture buffer, expanding
* bits into texels.
* "On" bits will set texels to 0x0.
diff --git a/src/mesa/state_tracker/st_cb_bitmap.h b/src/mesa/state_tracker/st_cb_bitmap.h
index b4254ca1eeb..dc7e5cb5c9e 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.h
+++ b/src/mesa/state_tracker/st_cb_bitmap.h
@@ -31,6 +31,7 @@
#include "main/compiler.h"
+#include <stdbool.h>
struct dd_function_table;
struct st_context;
@@ -47,13 +48,11 @@ extern void
st_destroy_bitmap(struct st_context *st);
extern void
-st_make_bitmap_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut,
- GLuint *bitmap_sampler);
-
-extern void
st_flush_bitmap_cache(struct st_context *st);
+extern const struct tgsi_token *
+st_get_bitmap_shader(const struct tgsi_token *tokens,
+ unsigned sampler_index,
+ bool use_texcoord, bool swizzle_xxxx);
#endif /* ST_CB_BITMAP_H */
diff --git a/src/mesa/state_tracker/st_cb_bitmap_shader.c b/src/mesa/state_tracker/st_cb_bitmap_shader.c
new file mode 100644
index 00000000000..cddea36d4f6
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_bitmap_shader.c
@@ -0,0 +1,174 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "st_cb_bitmap.h"
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_debug.h"
+
+struct tgsi_bitmap_transform {
+ struct tgsi_transform_context base;
+ struct tgsi_shader_info info;
+ unsigned sampler_index;
+ bool use_texcoord;
+ bool swizzle_xxxx;
+ bool first_instruction_emitted;
+};
+
+static inline struct tgsi_bitmap_transform *
+tgsi_bitmap_transform(struct tgsi_transform_context *tctx)
+{
+ return (struct tgsi_bitmap_transform *)tctx;
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *current_inst)
+{
+ struct tgsi_bitmap_transform *ctx = tgsi_bitmap_transform(tctx);
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+ unsigned i, semantic;
+ int texcoord_index = -1;
+
+ if (ctx->first_instruction_emitted) {
+ tctx->emit_instruction(tctx, current_inst);
+ return;
+ }
+
+ ctx->first_instruction_emitted = true;
+
+ /* Add TEMP[0] if it's missing. */
+ if (ctx->info.file_max[TGSI_FILE_TEMPORARY] == -1) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Add TEXCOORD[0] if it's missing. */
+ semantic = ctx->use_texcoord ? TGSI_SEMANTIC_TEXCOORD :
+ TGSI_SEMANTIC_GENERIC;
+ for (i = 0; i < ctx->info.num_inputs; i++) {
+ if (ctx->info.input_semantic_name[i] == semantic &&
+ ctx->info.input_semantic_index[i] == 0) {
+ texcoord_index = i;
+ break;
+ }
+ }
+
+ if (texcoord_index == -1) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = semantic;
+ decl.Declaration.Interpolate = 1;
+ decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ decl.Range.First = decl.Range.Last = ctx->info.num_inputs;
+ texcoord_index = ctx->info.num_inputs;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Declare the sampler. */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.Range.First = decl.Range.Last = ctx->sampler_index;
+ tctx->emit_declaration(tctx, &decl);
+
+ /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.Texture = 1;
+ inst.Texture.Texture = TGSI_TEXTURE_2D;
+
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Dst[0].Register.Index = 0;
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+ inst.Instruction.NumSrcRegs = 2;
+ inst.Src[0].Register.File = TGSI_FILE_INPUT;
+ inst.Src[0].Register.Index = texcoord_index;
+ inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+ inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
+ inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W;
+ inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+ inst.Src[1].Register.Index = ctx->sampler_index;
+
+ tctx->emit_instruction(tctx, &inst);
+
+ /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_KILL_IF;
+ inst.Instruction.NumDstRegs = 0;
+ inst.Instruction.NumSrcRegs = 1;
+
+ inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Src[0].Register.Index = 0;
+ inst.Src[0].Register.Negate = 1;
+ inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+ if (ctx->swizzle_xxxx) {
+ inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+ inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
+ } else {
+ inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
+ inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W;
+ }
+ tctx->emit_instruction(tctx, &inst);
+
+ /* And emit the instruction we got. */
+ tctx->emit_instruction(tctx, current_inst);
+}
+
+const struct tgsi_token *
+st_get_bitmap_shader(const struct tgsi_token *tokens,
+ unsigned sampler_index,
+ bool use_texcoord, bool swizzle_xxxx)
+{
+ struct tgsi_bitmap_transform ctx;
+ struct tgsi_token *newtoks;
+ int newlen;
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.base.transform_instruction = transform_instr;
+ ctx.sampler_index = sampler_index;
+ ctx.use_texcoord = use_texcoord;
+ ctx.swizzle_xxxx = swizzle_xxxx;
+ tgsi_scan_shader(tokens, &ctx.info);
+
+ newlen = tgsi_num_tokens(tokens) + 20;
+ newtoks = tgsi_alloc_tokens(newlen);
+ if (!newtoks)
+ return NULL;
+
+ tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+ return newtoks;
+}
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 152160e1dd2..7e8633edc1a 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -72,217 +72,74 @@
/**
- * Check if the given program is:
- * 0: MOVE result.color, fragment.color;
- * 1: END;
- */
-static GLboolean
-is_passthrough_program(const struct gl_fragment_program *prog)
-{
- if (prog->Base.NumInstructions == 2) {
- const struct prog_instruction *inst = prog->Base.Instructions;
- if (inst[0].Opcode == OPCODE_MOV &&
- inst[1].Opcode == OPCODE_END &&
- inst[0].DstReg.File == PROGRAM_OUTPUT &&
- inst[0].DstReg.Index == FRAG_RESULT_COLOR &&
- inst[0].DstReg.WriteMask == WRITEMASK_XYZW &&
- inst[0].SrcReg[0].File == PROGRAM_INPUT &&
- inst[0].SrcReg[0].Index == VARYING_SLOT_COL0 &&
- inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) {
- return GL_TRUE;
- }
- }
- return GL_FALSE;
-}
-
-
-/**
- * Returns a fragment program which implements the current pixel transfer ops.
- */
-static struct gl_fragment_program *
-get_glsl_pixel_transfer_program(struct st_context *st,
- struct st_fragment_program *orig)
-{
- int pixelMaps = 0, scaleAndBias = 0;
- struct gl_context *ctx = st->ctx;
- struct st_fragment_program *fp = (struct st_fragment_program *)
- ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
-
- if (!fp)
- return NULL;
-
- if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
- ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
- ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
- ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
- scaleAndBias = 1;
- }
-
- pixelMaps = ctx->Pixel.MapColorFlag;
-
- if (pixelMaps) {
- /* create the colormap/texture now if not already done */
- if (!st->pixel_xfer.pixelmap_texture) {
- st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
- st->pixel_xfer.pixelmap_sampler_view =
- st_create_texture_sampler_view(st->pipe,
- st->pixel_xfer.pixelmap_texture);
- }
- }
-
- get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi,
- scaleAndBias, pixelMaps);
-
- return &fp->Base;
-}
-
-
-/**
- * Make fragment shader for glDraw/CopyPixels. This shader is made
- * by combining the pixel transfer shader with the user-defined shader.
- * \param fpIn the current/incoming fragment program
- * \param fpOut returns the combined fragment program
- */
-void
-st_make_drawpix_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut)
-{
- struct gl_program *newProg;
- struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn;
-
- if (is_passthrough_program(fpIn)) {
- newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
- &st->pixel_xfer.program->Base);
- }
- else if (stfp->glsl_to_tgsi != NULL) {
- newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp);
- }
- else {
-#if 0
- /* debug */
- printf("Base program:\n");
- _mesa_print_program(&fpIn->Base);
- printf("DrawPix program:\n");
- _mesa_print_program(&st->pixel_xfer.program->Base.Base);
-#endif
- newProg = _mesa_combine_programs(st->ctx,
- &st->pixel_xfer.program->Base.Base,
- &fpIn->Base);
- }
-
-#if 0
- /* debug */
- printf("Combined DrawPixels program:\n");
- _mesa_print_program(newProg);
- printf("InputsRead: 0x%x\n", newProg->InputsRead);
- printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
- _mesa_print_parameter_list(newProg->Parameters);
-#endif
-
- *fpOut = (struct gl_fragment_program *) newProg;
-}
-
-
-/**
* Create fragment program that does a TEX() instruction to get a Z and/or
* stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL.
* Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX).
* Pass fragment color through as-is.
- * \return pointer to the gl_fragment program
+ *
+ * \return CSO of the fragment shader.
*/
-struct gl_fragment_program *
-st_make_drawpix_z_stencil_program(struct st_context *st,
- GLboolean write_depth,
- GLboolean write_stencil)
+static void *
+get_drawpix_z_stencil_program(struct st_context *st,
+ GLboolean write_depth,
+ GLboolean write_stencil)
{
- struct gl_context *ctx = st->ctx;
- struct gl_program *p;
- struct gl_fragment_program *fp;
- GLuint ic = 0;
+ struct ureg_program *ureg;
+ struct ureg_src depth_sampler, stencil_sampler;
+ struct ureg_src texcoord, color;
+ struct ureg_dst out_color, out_depth, out_stencil;
const GLuint shaderIndex = write_depth * 2 + write_stencil;
+ void *cso;
- assert(shaderIndex < ARRAY_SIZE(st->drawpix.shaders));
+ assert(shaderIndex < ARRAY_SIZE(st->drawpix.zs_shaders));
- if (st->drawpix.shaders[shaderIndex]) {
+ if (st->drawpix.zs_shaders[shaderIndex]) {
/* already have the proper shader */
- return st->drawpix.shaders[shaderIndex];
+ return st->drawpix.zs_shaders[shaderIndex];
}
- /*
- * Create shader now
- */
- p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!p)
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (ureg == NULL)
return NULL;
- p->NumInstructions = write_depth ? 3 : 1;
- p->NumInstructions += write_stencil ? 1 : 0;
-
- p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
- if (!p->Instructions) {
- ctx->Driver.DeleteProgram(ctx, p);
- return NULL;
- }
- _mesa_init_instructions(p->Instructions, p->NumInstructions);
+ ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, TRUE);
if (write_depth) {
- /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH;
- p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_TEX0;
- p->Instructions[ic].TexSrcUnit = 0;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
- /* MOV result.color, fragment.color; */
- p->Instructions[ic].Opcode = OPCODE_MOV;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLOR;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_COL0;
- ic++;
+ color = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0,
+ TGSI_INTERPOLATE_COLOR);
+ out_color = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+
+ depth_sampler = ureg_DECL_sampler(ureg, 0);
+ out_depth = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
}
if (write_stencil) {
- /* TEX result.stencil, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL;
- p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_TEX0;
- p->Instructions[ic].TexSrcUnit = 1;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
+ stencil_sampler = ureg_DECL_sampler(ureg, 1);
+ out_stencil = ureg_DECL_output(ureg, TGSI_SEMANTIC_STENCIL, 0);
}
- /* END; */
- p->Instructions[ic++].Opcode = OPCODE_END;
-
- assert(ic == p->NumInstructions);
+ texcoord = ureg_DECL_fs_input(ureg,
+ st->needs_texcoord_semantic ?
+ TGSI_SEMANTIC_TEXCOORD :
+ TGSI_SEMANTIC_GENERIC,
+ 0, TGSI_INTERPOLATE_LINEAR);
- p->InputsRead = VARYING_BIT_TEX0 | VARYING_BIT_COL0;
- p->OutputsWritten = 0;
if (write_depth) {
- p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_DEPTH);
- p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR);
+ ureg_TEX(ureg, ureg_writemask(out_depth, TGSI_WRITEMASK_Z),
+ TGSI_TEXTURE_2D, texcoord, depth_sampler);
+ ureg_MOV(ureg, out_color, color);
}
- if (write_stencil)
- p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_STENCIL);
- p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */
if (write_stencil)
- p->SamplersUsed |= 1 << 1;
+ ureg_TEX(ureg, ureg_writemask(out_stencil, TGSI_WRITEMASK_Y),
+ TGSI_TEXTURE_2D, texcoord, stencil_sampler);
- fp = (struct gl_fragment_program *) p;
+ ureg_END(ureg);
+ cso = ureg_create_shader_and_destroy(ureg, st->pipe);
/* save the new shader */
- st->drawpix.shaders[shaderIndex] = fp;
-
- return fp;
+ st->drawpix.zs_shaders[shaderIndex] = cso;
+ return cso;
}
@@ -668,6 +525,7 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
int num_sampler_view,
void *driver_vp,
void *driver_fp,
+ struct st_fp_variant *fpv,
const GLfloat *color,
GLboolean invertTex,
GLboolean write_depth, GLboolean write_stencil)
@@ -755,10 +613,9 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
cso_set_tesseval_shader_handle(cso, NULL);
cso_set_geometry_shader_handle(cso, NULL);
- /* texture sampling state: */
+ /* user samplers, plus the drawpix samplers */
{
struct pipe_sampler_state sampler;
- const struct pipe_sampler_state *states[2] = {&sampler, &sampler};
memset(&sampler, 0, sizeof(sampler));
sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
@@ -769,8 +626,25 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
sampler.normalized_coords = normalized;
- cso_set_samplers(cso, PIPE_SHADER_FRAGMENT,
- num_sampler_view > 1 ? 2 : 1, states);
+ if (fpv) {
+ const struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+ uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1,
+ st->state.num_samplers[PIPE_SHADER_FRAGMENT]);
+ uint i;
+
+ for (i = 0; i < st->state.num_samplers[PIPE_SHADER_FRAGMENT]; i++)
+ samplers[i] = &st->state.samplers[PIPE_SHADER_FRAGMENT][i];
+
+ samplers[fpv->drawpix_sampler] = &sampler;
+ if (sv[1])
+ samplers[fpv->pixelmap_sampler] = &sampler;
+
+ cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num, samplers);
+ } else {
+ const struct pipe_sampler_state *samplers[2] = {&sampler, &sampler};
+
+ cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, samplers);
+ }
}
/* viewport state: viewport matching window dims */
@@ -790,8 +664,21 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
cso_set_vertex_elements(cso, 3, st->velems_util_draw);
cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
- /* texture state: */
- cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, sv);
+ /* user textures, plus the drawpix textures */
+ if (fpv) {
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
+ uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1,
+ st->state.num_sampler_views[PIPE_SHADER_FRAGMENT]);
+
+ memcpy(sampler_views, st->state.sampler_views[PIPE_SHADER_FRAGMENT],
+ sizeof(sampler_views));
+
+ sampler_views[fpv->drawpix_sampler] = sv[0];
+ if (sv[1])
+ sampler_views[fpv->pixelmap_sampler] = sv[1];
+ cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num, sampler_views);
+ } else
+ cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, sv);
/* Compute Gallium window coords (y=0=top) with pixel zoom.
* Recall that these coords are transformed by the current
@@ -1048,30 +935,6 @@ get_color_fp_variant(struct st_context *st)
/**
- * Get fragment program variant for a glDrawPixels or glCopyPixels
- * command for depth/stencil data.
- */
-static struct st_fp_variant *
-get_depth_stencil_fp_variant(struct st_context *st, GLboolean write_depth,
- GLboolean write_stencil)
-{
- struct st_fp_variant_key key;
- struct st_fp_variant *fpv;
-
- memset(&key, 0, sizeof(key));
-
- key.st = st;
- key.drawpixels = 1;
- key.drawpixels_z = write_depth;
- key.drawpixels_stencil = write_stencil;
-
- fpv = st_get_fp_variant(st, st->fp, &key);
-
- return fpv;
-}
-
-
-/**
* Clamp glDrawPixels width and height to the maximum texture size.
*/
static void
@@ -1109,8 +972,8 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
struct pipe_sampler_view *sv[2] = { NULL };
int num_sampler_view = 1;
- struct st_fp_variant *fpv;
struct gl_pixelstore_attrib clippedUnpack;
+ struct st_fp_variant *fpv = NULL;
/* Mesa state should be up to date by now */
assert(ctx->NewState == 0x0);
@@ -1144,31 +1007,27 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
* Get vertex/fragment shaders
*/
if (write_depth || write_stencil) {
- fpv = get_depth_stencil_fp_variant(st, write_depth, write_stencil);
-
- driver_fp = fpv->driver_shader;
-
+ driver_fp = get_drawpix_z_stencil_program(st, write_depth,
+ write_stencil);
driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
-
color = ctx->Current.RasterColor;
}
else {
fpv = get_color_fp_variant(st);
driver_fp = fpv->driver_shader;
-
driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
color = NULL;
- if (st->pixel_xfer.pixelmap_enabled) {
+ if (ctx->Pixel.MapColorFlag) {
pipe_sampler_view_reference(&sv[1],
st->pixel_xfer.pixelmap_sampler_view);
num_sampler_view++;
}
- }
- /* update fragment program constants */
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+ /* update fragment program constants */
+ st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+ }
/* draw with textured quad */
{
@@ -1197,7 +1056,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
sv,
num_sampler_view,
driver_vp,
- driver_fp,
+ driver_fp, fpv,
color, GL_FALSE, write_depth, write_stencil);
pipe_sampler_view_reference(&sv[0], NULL);
if (num_sampler_view > 1)
@@ -1452,6 +1311,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
void *driver_vp, *driver_fp;
struct pipe_resource *pt;
struct pipe_sampler_view *sv[2] = { NULL };
+ struct st_fp_variant *fpv = NULL;
int num_sampler_view = 1;
GLfloat *color;
enum pipe_format srcFormat;
@@ -1459,7 +1319,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
GLboolean invertTex = GL_FALSE;
GLint readX, readY, readW, readH;
struct gl_pixelstore_attrib pack = ctx->DefaultPacking;
- struct st_fp_variant *fpv;
st_validate_state(st);
@@ -1491,19 +1350,22 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
* Get vertex/fragment shaders
*/
if (type == GL_COLOR) {
+ fpv = get_color_fp_variant(st);
+
rbRead = st_get_color_read_renderbuffer(ctx);
color = NULL;
- fpv = get_color_fp_variant(st);
driver_fp = fpv->driver_shader;
-
driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
- if (st->pixel_xfer.pixelmap_enabled) {
+ if (ctx->Pixel.MapColorFlag) {
pipe_sampler_view_reference(&sv[1],
st->pixel_xfer.pixelmap_sampler_view);
num_sampler_view++;
}
+
+ /* update fragment program constants */
+ st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
}
else {
assert(type == GL_DEPTH);
@@ -1511,15 +1373,10 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
Attachment[BUFFER_DEPTH].Renderbuffer);
color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
- fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE);
- driver_fp = fpv->driver_shader;
-
+ driver_fp = get_drawpix_z_stencil_program(st, GL_TRUE, GL_FALSE);
driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
}
- /* update fragment program constants */
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
-
/* Choose the format for the temporary texture. */
srcFormat = rbRead->texture->format;
srcBind = PIPE_BIND_SAMPLER_VIEW |
@@ -1645,7 +1502,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
sv,
num_sampler_view,
driver_vp,
- driver_fp,
+ driver_fp, fpv,
color, invertTex, GL_FALSE, GL_FALSE);
pipe_resource_reference(&pt, NULL);
@@ -1666,12 +1523,12 @@ st_destroy_drawpix(struct st_context *st)
{
GLuint i;
- for (i = 0; i < ARRAY_SIZE(st->drawpix.shaders); i++) {
- if (st->drawpix.shaders[i])
- _mesa_reference_fragprog(st->ctx, &st->drawpix.shaders[i], NULL);
+ for (i = 0; i < ARRAY_SIZE(st->drawpix.zs_shaders); i++) {
+ if (st->drawpix.zs_shaders[i])
+ cso_delete_fragment_shader(st->cso_context,
+ st->drawpix.zs_shaders[i]);
}
- st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL);
if (st->drawpix.vert_shaders[0])
cso_delete_vertex_shader(st->cso_context, st->drawpix.vert_shaders[0]);
if (st->drawpix.vert_shaders[1])
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.h b/src/mesa/state_tracker/st_cb_drawpixels.h
index c707ace2f9f..f1fb32dd6cf 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.h
+++ b/src/mesa/state_tracker/st_cb_drawpixels.h
@@ -31,6 +31,7 @@
#include "main/compiler.h"
+#include <stdbool.h>
struct dd_function_table;
struct st_context;
@@ -40,15 +41,11 @@ extern void st_init_drawpixels_functions(struct dd_function_table *functions);
extern void
st_destroy_drawpix(struct st_context *st);
-extern void
-st_make_drawpix_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut);
-
-extern struct gl_fragment_program *
-st_make_drawpix_z_stencil_program(struct st_context *st,
- GLboolean write_depth,
- GLboolean write_stencil);
-
+extern const struct tgsi_token *
+st_get_drawpix_shader(const struct tgsi_token *tokens, bool use_texcoord,
+ bool scale_and_bias, unsigned scale_const,
+ unsigned bias_const, bool pixel_maps,
+ unsigned drawpix_sampler, unsigned pixelmap_sampler,
+ unsigned texcoord_const);
#endif /* ST_CB_DRAWPIXELS_H */
diff --git a/src/mesa/state_tracker/st_cb_drawpixels_shader.c b/src/mesa/state_tracker/st_cb_drawpixels_shader.c
new file mode 100644
index 00000000000..749b46cfbf7
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_drawpixels_shader.c
@@ -0,0 +1,278 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "st_cb_drawpixels.h"
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_scan.h"
+
+struct tgsi_drawpix_transform {
+ struct tgsi_transform_context base;
+ struct tgsi_shader_info info;
+ bool use_texcoord;
+ bool scale_and_bias;
+ bool pixel_maps;
+ bool first_instruction_emitted;
+ unsigned scale_const;
+ unsigned bias_const;
+ unsigned color_temp;
+ unsigned drawpix_sampler;
+ unsigned pixelmap_sampler;
+ unsigned texcoord_const;
+};
+
+static inline struct tgsi_drawpix_transform *
+tgsi_drawpix_transform(struct tgsi_transform_context *tctx)
+{
+ return (struct tgsi_drawpix_transform *)tctx;
+}
+
+static void
+set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index,
+ unsigned x, unsigned y, unsigned z, unsigned w)
+{
+ inst->Src[i].Register.File = file;
+ inst->Src[i].Register.Index = index;
+ inst->Src[i].Register.SwizzleX = x;
+ inst->Src[i].Register.SwizzleY = y;
+ inst->Src[i].Register.SwizzleZ = z;
+ inst->Src[i].Register.SwizzleW = w;
+}
+
+#define SET_SRC(inst, i, file, index, x, y, z, w) \
+ set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \
+ TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w)
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *current_inst)
+{
+ struct tgsi_drawpix_transform *ctx = tgsi_drawpix_transform(tctx);
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+ unsigned i, sem_texcoord = ctx->use_texcoord ? TGSI_SEMANTIC_TEXCOORD :
+ TGSI_SEMANTIC_GENERIC;
+ int texcoord_index = -1;
+
+ if (ctx->first_instruction_emitted)
+ goto transform_inst;
+
+ ctx->first_instruction_emitted = true;
+
+ /* Add scale and bias constants. */
+ if (ctx->scale_and_bias) {
+ if (ctx->info.const_file_max[0] < (int)ctx->scale_const) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Range.First = decl.Range.Last = ctx->scale_const;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ if (ctx->info.const_file_max[0] < (int)ctx->bias_const) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Range.First = decl.Range.Last = ctx->bias_const;
+ tctx->emit_declaration(tctx, &decl);
+ }
+ }
+
+ if (ctx->info.const_file_max[0] < (int)ctx->texcoord_const) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Range.First = decl.Range.Last = ctx->texcoord_const;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Add a new temp. */
+ ctx->color_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.Range.First = decl.Range.Last = ctx->color_temp;
+ tctx->emit_declaration(tctx, &decl);
+
+ /* Add TEXCOORD[texcoord_slot] if it's missing. */
+ for (i = 0; i < ctx->info.num_inputs; i++) {
+ if (ctx->info.input_semantic_name[i] == sem_texcoord &&
+ ctx->info.input_semantic_index[i] == 0) {
+ texcoord_index = i;
+ break;
+ }
+ }
+
+ if (texcoord_index == -1) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = sem_texcoord;
+ decl.Declaration.Interpolate = 1;
+ decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ decl.Range.First = decl.Range.Last = ctx->info.num_inputs;
+ texcoord_index = ctx->info.num_inputs;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Declare the drawpix sampler if it's missing. */
+ if (!(ctx->info.samplers_declared & (1 << ctx->drawpix_sampler))) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.Range.First = decl.Range.Last = ctx->drawpix_sampler;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Declare the pixel map sampler if it's missing. */
+ if (ctx->pixel_maps &&
+ !(ctx->info.samplers_declared & (1 << ctx->pixelmap_sampler))) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.Range.First = decl.Range.Last = ctx->pixelmap_sampler;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* Get initial pixel color from the texture.
+ * TEX temp, fragment.texcoord[0], texture[0], 2D;
+ */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.Texture = 1;
+ inst.Texture.Texture = TGSI_TEXTURE_2D;
+
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Dst[0].Register.Index = ctx->color_temp;
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+ inst.Instruction.NumSrcRegs = 2;
+ SET_SRC(&inst, 0, TGSI_FILE_INPUT, texcoord_index, X, Y, Z, W);
+ inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+ inst.Src[1].Register.Index = ctx->drawpix_sampler;
+
+ tctx->emit_instruction(tctx, &inst);
+
+ /* Apply the scale and bias. */
+ if (ctx->scale_and_bias) {
+ /* MAD temp, temp, scale, bias; */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_MAD;
+
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Dst[0].Register.Index = ctx->color_temp;
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+ inst.Instruction.NumSrcRegs = 3;
+ SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, X, Y, Z, W);
+ SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, ctx->scale_const, X, Y, Z, W);
+ SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, ctx->bias_const, X, Y, Z, W);
+
+ tctx->emit_instruction(tctx, &inst);
+ }
+
+ if (ctx->pixel_maps) {
+ /* do four pixel map look-ups with two TEX instructions: */
+
+ /* TEX temp.xy, temp.xyyy, texture[1], 2D; */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.Texture = 1;
+ inst.Texture.Texture = TGSI_TEXTURE_2D;
+
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Dst[0].Register.Index = ctx->color_temp;
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY;
+
+ inst.Instruction.NumSrcRegs = 2;
+ SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, X, Y, Y, Y);
+ inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+ inst.Src[1].Register.Index = ctx->pixelmap_sampler;
+
+ tctx->emit_instruction(tctx, &inst);
+
+ /* TEX temp.zw, temp.zwww, texture[1], 2D; */
+ inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_ZW;
+ SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, Z, W, W, W);
+ tctx->emit_instruction(tctx, &inst);
+ }
+
+ /* Now, "color_temp" should be used in place of IN:COLOR0,
+ * and CONST[texcoord_slot] should be used in place of IN:TEXCOORD0.
+ */
+
+transform_inst:
+
+ for (i = 0; i < current_inst->Instruction.NumSrcRegs; i++) {
+ struct tgsi_full_src_register *src = &current_inst->Src[i];
+ unsigned reg = src->Register.Index;
+
+ if (src->Register.File != TGSI_FILE_INPUT || src->Register.Indirect)
+ continue;
+
+ if (ctx->info.input_semantic_name[reg] == TGSI_SEMANTIC_COLOR &&
+ ctx->info.input_semantic_index[reg] == 0) {
+ src->Register.File = TGSI_FILE_TEMPORARY;
+ src->Register.Index = ctx->color_temp;
+ } else if (ctx->info.input_semantic_name[reg] == sem_texcoord &&
+ ctx->info.input_semantic_index[reg] == 0) {
+ src->Register.File = TGSI_FILE_CONSTANT;
+ src->Register.Index = ctx->texcoord_const;
+ }
+ }
+
+ tctx->emit_instruction(tctx, current_inst);
+}
+
+const struct tgsi_token *
+st_get_drawpix_shader(const struct tgsi_token *tokens, bool use_texcoord,
+ bool scale_and_bias, unsigned scale_const,
+ unsigned bias_const, bool pixel_maps,
+ unsigned drawpix_sampler, unsigned pixelmap_sampler,
+ unsigned texcoord_const)
+{
+ struct tgsi_drawpix_transform ctx;
+ struct tgsi_token *newtoks;
+ int newlen;
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.base.transform_instruction = transform_instr;
+ ctx.use_texcoord = use_texcoord;
+ ctx.scale_and_bias = scale_and_bias;
+ ctx.scale_const = scale_const;
+ ctx.bias_const = bias_const;
+ ctx.pixel_maps = pixel_maps;
+ ctx.drawpix_sampler = drawpix_sampler;
+ ctx.pixelmap_sampler = pixelmap_sampler;
+ ctx.texcoord_const = texcoord_const;
+ tgsi_scan_shader(tokens, &ctx.info);
+
+ newlen = tgsi_num_tokens(tokens) + 30;
+ newtoks = tgsi_alloc_tokens(newlen);
+ if (!newtoks)
+ return NULL;
+
+ tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+ return newtoks;
+}
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index ff703fa41cb..2a2eb0992c8 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -456,7 +456,7 @@ st_update_renderbuffer_surface(struct st_context *st,
surf_tmpl.u.tex.first_layer = first_layer;
surf_tmpl.u.tex.last_layer = last_layer;
- pipe_surface_reference(&strb->surface, NULL);
+ pipe_surface_release(pipe, &strb->surface);
strb->surface = pipe->create_surface(pipe, resource, &surf_tmpl);
}
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 3029909d12d..708bdf5011e 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -105,29 +105,24 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id)
switch (target) {
case GL_VERTEX_PROGRAM_ARB: {
struct st_vertex_program *prog = ST_CALLOC_STRUCT(st_vertex_program);
- return _mesa_init_vertex_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
case GL_FRAGMENT_PROGRAM_ARB: {
struct st_fragment_program *prog = ST_CALLOC_STRUCT(st_fragment_program);
- return _mesa_init_fragment_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
case GL_GEOMETRY_PROGRAM_NV: {
struct st_geometry_program *prog = ST_CALLOC_STRUCT(st_geometry_program);
- return _mesa_init_geometry_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
case GL_TESS_CONTROL_PROGRAM_NV: {
struct st_tessctrl_program *prog = ST_CALLOC_STRUCT(st_tessctrl_program);
- return _mesa_init_tess_ctrl_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
case GL_TESS_EVALUATION_PROGRAM_NV: {
struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program);
- return _mesa_init_tess_eval_program(ctx, &prog->Base, target, id);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
-
default:
assert(0);
return NULL;
@@ -234,6 +229,8 @@ st_program_string_notify( struct gl_context *ctx,
struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
st_release_fp_variants(st, stfp);
+ if (!st_translate_fragment_program(st, stfp))
+ return false;
if (st->fp == stfp)
st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
@@ -242,6 +239,8 @@ st_program_string_notify( struct gl_context *ctx,
struct st_geometry_program *stgp = (struct st_geometry_program *) prog;
st_release_gp_variants(st, stgp);
+ if (!st_translate_geometry_program(st, stgp))
+ return false;
if (st->gp == stgp)
st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
@@ -249,7 +248,9 @@ st_program_string_notify( struct gl_context *ctx,
else if (target == GL_VERTEX_PROGRAM_ARB) {
struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
- st_release_vp_variants( st, stvp );
+ st_release_vp_variants(st, stvp);
+ if (!st_translate_vertex_program(st, stvp))
+ return false;
if (st->vp == stvp)
st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
@@ -259,6 +260,8 @@ st_program_string_notify( struct gl_context *ctx,
(struct st_tessctrl_program *) prog;
st_release_tcp_variants(st, sttcp);
+ if (!st_translate_tessctrl_program(st, sttcp))
+ return false;
if (st->tcp == sttcp)
st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
@@ -268,6 +271,8 @@ st_program_string_notify( struct gl_context *ctx,
(struct st_tesseval_program *) prog;
st_release_tep_variants(st, sttep);
+ if (!st_translate_tesseval_program(st, sttep))
+ return false;
if (st->tep == sttep)
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index a9ab5edcf49..bef7307bb27 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -224,8 +224,6 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
st->ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
- st->pixel_xfer.cache = _mesa_new_program_cache();
-
st->has_stencil_export =
screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT);
st->has_shader_model3 = screen->get_param(screen, PIPE_CAP_SM3);
@@ -386,8 +384,8 @@ void st_destroy_context( struct st_context *st )
pipe_surface_reference(&st->state.framebuffer.cbufs[i], NULL);
}
pipe_surface_reference(&st->state.framebuffer.zsbuf, NULL);
-
- _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache);
+ pipe_sampler_view_reference(&st->pixel_xfer.pixelmap_sampler_view, NULL);
+ pipe_resource_reference(&st->pixel_xfer.pixelmap_texture, NULL);
_vbo_DestroyContext(st->ctx);
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index a4cda29059d..f187d82449b 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -162,15 +162,8 @@ struct st_context
struct gl_texture_object *default_texture;
struct {
- struct gl_program_cache *cache;
- struct st_fragment_program *program; /**< cur pixel transfer prog */
- GLuint xfer_prog_sn; /**< pixel xfer program serial no. */
- GLuint user_prog_sn; /**< user fragment program serial no. */
- struct st_fragment_program *combined_prog;
- GLuint combined_prog_sn;
struct pipe_resource *pixelmap_texture;
struct pipe_sampler_view *pixelmap_sampler_view;
- boolean pixelmap_enabled; /**< use the pixelmap texture? */
} pixel_xfer;
/** for glBitmap */
@@ -184,7 +177,7 @@ struct st_context
/** for glDraw/CopyPixels */
struct {
- struct gl_fragment_program *shaders[4];
+ void *zs_shaders[4];
void *vert_shaders[2]; /**< ureg shaders */
} drawpix;
diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c
index 50891c112cb..6d859c6ab5b 100644
--- a/src/mesa/state_tracker/st_debug.c
+++ b/src/mesa/state_tracker/st_debug.c
@@ -98,7 +98,7 @@ st_print_current(void)
if (st->vp->Base.Base.Parameters)
_mesa_print_parameter_list(st->vp->Base.Base.Parameters);
- tgsi_dump( st->fp->variants[0].tgsi.tokens, 0 );
+ tgsi_dump(st->fp->tgsi.tokens, 0);
if (st->fp->Base.Base.Parameters)
_mesa_print_parameter_list(st->fp->Base.Base.Parameters);
}
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 633e90ffa38..f481e8902d8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4334,216 +4334,6 @@ glsl_to_tgsi_visitor::renumber_registers(void)
ralloc_free(first_reads);
}
-/**
- * Returns a fragment program which implements the current pixel transfer ops.
- * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
- */
-extern "C" void
-get_pixel_transfer_visitor(struct st_fragment_program *fp,
- glsl_to_tgsi_visitor *original,
- int scale_and_bias, int pixel_maps)
-{
- glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
- struct st_context *st = st_context(original->ctx);
- struct gl_program *prog = &fp->Base.Base;
- struct gl_program_parameter_list *params = _mesa_new_parameter_list();
- st_src_reg coord, src0;
- st_dst_reg dst0;
- glsl_to_tgsi_instruction *inst;
-
- /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
- v->ctx = original->ctx;
- v->prog = prog;
- v->shader_program = NULL;
- v->shader = NULL;
- v->glsl_version = original->glsl_version;
- v->native_integers = original->native_integers;
- v->options = original->options;
- v->next_temp = original->next_temp;
- v->num_address_regs = original->num_address_regs;
- v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_consts = original->indirect_addr_consts;
- memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
- v->num_immediates = original->num_immediates;
-
- /*
- * Get initial pixel color from the texture.
- * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
- */
- coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
- src0 = v->get_temp(glsl_type::vec4_type);
- dst0 = st_dst_reg(src0);
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- prog->InputsRead |= VARYING_BIT_TEX0;
- prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
- v->samplers_used |= (1 << 0);
-
- if (scale_and_bias) {
- static const gl_state_index scale_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_SCALE,
- (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
- static const gl_state_index bias_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_BIAS,
- (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
- GLint scale_p, bias_p;
- st_src_reg scale, bias;
-
- scale_p = _mesa_add_state_reference(params, scale_state);
- bias_p = _mesa_add_state_reference(params, bias_state);
-
- /* MAD colorTemp, colorTemp, scale, bias; */
- scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
- bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
- inst = v->emit_asm(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
- }
-
- if (pixel_maps) {
- st_src_reg temp = v->get_temp(glsl_type::vec4_type);
- st_dst_reg temp_dst = st_dst_reg(temp);
-
- assert(st->pixel_xfer.pixelmap_texture);
- (void) st;
-
- /* With a little effort, we can do four pixel map look-ups with
- * two TEX instructions:
- */
-
- /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
- temp_dst.writemask = WRITEMASK_XY; /* write R,G */
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
- inst->sampler.index = 1;
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
- src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
- temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
- inst->sampler.index = 1;
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
- v->samplers_used |= (1 << 1);
-
- /* MOV colorTemp, temp; */
- inst = v->emit_asm(NULL, TGSI_OPCODE_MOV, dst0, temp);
- }
-
- /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
- * new visitor. */
- foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
- glsl_to_tgsi_instruction *newinst;
- st_src_reg src_regs[4];
-
- if (inst->dst[0].file == PROGRAM_OUTPUT)
- prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
-
- for (int i = 0; i < 4; i++) {
- src_regs[i] = inst->src[i];
- if (src_regs[i].file == PROGRAM_INPUT &&
- src_regs[i].index == VARYING_SLOT_COL0) {
- src_regs[i].file = PROGRAM_TEMPORARY;
- src_regs[i].index = src0.index;
- }
- else if (src_regs[i].file == PROGRAM_INPUT)
- prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
- }
-
- newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
- newinst->tex_target = inst->tex_target;
- newinst->sampler_array_size = inst->sampler_array_size;
- }
-
- /* Make modifications to fragment program info. */
- prog->Parameters = _mesa_combine_parameter_lists(params,
- original->prog->Parameters);
- _mesa_free_parameter_list(params);
- count_resources(v, prog);
- fp->glsl_to_tgsi = v;
-}
-
-/**
- * Make fragment program for glBitmap:
- * Sample the texture and kill the fragment if the bit is 0.
- * This program will be combined with the user's fragment program.
- *
- * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
- */
-extern "C" void
-get_bitmap_visitor(struct st_fragment_program *fp,
- glsl_to_tgsi_visitor *original, int samplerIndex)
-{
- glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
- struct st_context *st = st_context(original->ctx);
- struct gl_program *prog = &fp->Base.Base;
- st_src_reg coord, src0;
- st_dst_reg dst0;
- glsl_to_tgsi_instruction *inst;
-
- /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
- v->ctx = original->ctx;
- v->prog = prog;
- v->shader_program = NULL;
- v->shader = NULL;
- v->glsl_version = original->glsl_version;
- v->native_integers = original->native_integers;
- v->options = original->options;
- v->next_temp = original->next_temp;
- v->num_address_regs = original->num_address_regs;
- v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_consts = original->indirect_addr_consts;
- memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
- v->num_immediates = original->num_immediates;
-
- /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
- src0 = v->get_temp(glsl_type::vec4_type);
- dst0 = st_dst_reg(src0);
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
- inst->sampler.index = samplerIndex;
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- prog->InputsRead |= VARYING_BIT_TEX0;
- prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
- v->samplers_used |= (1 << samplerIndex);
-
- /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
- src0.negate = NEGATE_XYZW;
- if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
- src0.swizzle = SWIZZLE_XXXX;
- inst = v->emit_asm(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
-
- /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
- * new visitor. */
- foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
- glsl_to_tgsi_instruction *newinst;
- st_src_reg src_regs[4];
-
- if (inst->dst[0].file == PROGRAM_OUTPUT)
- prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
-
- for (int i = 0; i < 4; i++) {
- src_regs[i] = inst->src[i];
- if (src_regs[i].file == PROGRAM_INPUT)
- prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
- }
-
- newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
- newinst->tex_target = inst->tex_target;
- newinst->sampler_array_size = inst->sampler_array_size;
- }
-
- /* Make modifications to fragment program info. */
- prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
- count_resources(v, prog);
- fp->glsl_to_tgsi = v;
-}
-
/* ------------------------- TGSI conversion stuff -------------------------- */
struct label {
unsigned branch_target;
@@ -4852,7 +4642,7 @@ src_register(struct st_translate *t, const st_src_reg *reg)
static struct ureg_dst
translate_dst(struct st_translate *t,
const st_dst_reg *dst_reg,
- bool saturate, bool clamp_color)
+ bool saturate)
{
struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
dst_reg->array_id);
@@ -4864,28 +4654,6 @@ translate_dst(struct st_translate *t,
if (saturate)
dst = ureg_saturate(dst);
- else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
- /* Clamp colors for ARB_color_buffer_float. */
- switch (t->procType) {
- case TGSI_PROCESSOR_VERTEX:
- /* This can only occur with a compatibility profile, which doesn't
- * support geometry shaders. */
- if (dst_reg->index == VARYING_SLOT_COL0 ||
- dst_reg->index == VARYING_SLOT_COL1 ||
- dst_reg->index == VARYING_SLOT_BFC0 ||
- dst_reg->index == VARYING_SLOT_BFC1) {
- dst = ureg_saturate(dst);
- }
- break;
-
- case TGSI_PROCESSOR_FRAGMENT:
- if (dst_reg->index == FRAG_RESULT_COLOR ||
- dst_reg->index >= FRAG_RESULT_DATA0) {
- dst = ureg_saturate(dst);
- }
- break;
- }
- }
if (dst_reg->reladdr != NULL) {
assert(dst_reg->file != PROGRAM_TEMPORARY);
@@ -4991,8 +4759,7 @@ translate_tex_offset(struct st_translate *t,
static void
compile_tgsi_instruction(struct st_translate *t,
- const glsl_to_tgsi_instruction *inst,
- bool clamp_dst_color_output)
+ const glsl_to_tgsi_instruction *inst)
{
struct ureg_program *ureg = t->ureg;
GLuint i;
@@ -5010,8 +4777,7 @@ compile_tgsi_instruction(struct st_translate *t,
for (i = 0; i < num_dst; i++)
dst[i] = translate_dst(t,
&inst->dst[i],
- inst->saturate,
- clamp_dst_color_output);
+ inst->saturate);
for (i = 0; i < num_src; i++)
src[i] = translate_src(t, &inst->src[i]);
@@ -5286,16 +5052,6 @@ emit_face_var(struct gl_context *ctx, struct st_translate *t)
t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
}
-static void
-emit_edgeflags(struct st_translate *t)
-{
- struct ureg_program *ureg = t->ureg;
- struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
- struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
-
- ureg_MOV(ureg, edge_dst, edge_src);
-}
-
static bool
find_array(unsigned attr, struct array_decl *arrays, unsigned count,
unsigned *array_id, unsigned *array_size)
@@ -5353,9 +5109,7 @@ st_translate_program(
const GLuint outputMapping[],
const GLuint outputSlotToAttr[],
const ubyte outputSemanticName[],
- const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags,
- boolean clamp_color)
+ const ubyte outputSemanticIndex[])
{
struct st_translate *t;
unsigned i;
@@ -5544,8 +5298,6 @@ st_translate_program(
t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
}
}
- if (passthrough_edgeflags)
- emit_edgeflags(t);
}
/* Declare address register.
@@ -5639,7 +5391,7 @@ st_translate_program(
unsigned num_ubos = program->shader->NumUniformBlocks;
for (i = 0; i < num_ubos; i++) {
- unsigned size = program->shader->UniformBlocks[i].UniformBufferSize;
+ unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize;
unsigned num_const_vecs = (size + 15) / 16;
unsigned first, last;
assert(num_const_vecs > 0);
@@ -5696,7 +5448,7 @@ st_translate_program(
*/
foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
set_insn_start(t, ureg_get_instruction_number(ureg));
- compile_tgsi_instruction(t, inst, clamp_color);
+ compile_tgsi_instruction(t, inst);
}
/* Fix up all emitted labels:
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index 4af747fa9de..729295bcb52 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -52,17 +52,9 @@ enum pipe_error st_translate_program(
const GLuint outputMapping[],
const GLuint outputSlotToAttr[],
const ubyte outputSemanticName[],
- const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags,
- boolean clamp_color);
+ const ubyte outputSemanticIndex[]);
void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
-void get_pixel_transfer_visitor(struct st_fragment_program *fp,
- struct glsl_to_tgsi_visitor *original,
- int scale_and_bias, int pixel_maps);
-void get_bitmap_visitor(struct st_fragment_program *fp,
- struct glsl_to_tgsi_visitor *original,
- int samplerIndex);
GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 896e239ee68..4b9dc994ea5 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -283,8 +283,7 @@ st_translate_texture_target( GLuint textarget,
static struct ureg_dst
translate_dst( struct st_translate *t,
const struct prog_dst_register *DstReg,
- boolean saturate,
- boolean clamp_color)
+ boolean saturate)
{
struct ureg_dst dst = dst_register( t,
DstReg->File,
@@ -295,27 +294,6 @@ translate_dst( struct st_translate *t,
if (saturate)
dst = ureg_saturate( dst );
- else if (clamp_color && DstReg->File == PROGRAM_OUTPUT) {
- /* Clamp colors for ARB_color_buffer_float. */
- switch (t->procType) {
- case TGSI_PROCESSOR_VERTEX:
- /* This can only occur with a compatibility profile, which doesn't
- * support geometry shaders. */
- if (DstReg->Index == VARYING_SLOT_COL0 ||
- DstReg->Index == VARYING_SLOT_COL1 ||
- DstReg->Index == VARYING_SLOT_BFC0 ||
- DstReg->Index == VARYING_SLOT_BFC1) {
- dst = ureg_saturate(dst);
- }
- break;
-
- case TGSI_PROCESSOR_FRAGMENT:
- if (DstReg->Index >= FRAG_RESULT_COLOR) {
- dst = ureg_saturate(dst);
- }
- break;
- }
- }
if (DstReg->RelAddr)
dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
@@ -649,8 +627,7 @@ static void
compile_instruction(
struct gl_context *ctx,
struct st_translate *t,
- const struct prog_instruction *inst,
- boolean clamp_dst_color_output)
+ const struct prog_instruction *inst)
{
struct ureg_program *ureg = t->ureg;
GLuint i;
@@ -665,8 +642,7 @@ compile_instruction(
if (num_dst)
dst[0] = translate_dst( t,
&inst->DstReg,
- inst->Saturate,
- clamp_dst_color_output);
+ inst->Saturate);
for (i = 0; i < num_src; i++)
src[i] = translate_src( t, &inst->SrcReg[i] );
@@ -974,18 +950,6 @@ emit_face_var( struct st_translate *t,
}
-static void
-emit_edgeflags( struct st_translate *t,
- const struct gl_program *program )
-{
- struct ureg_program *ureg = t->ureg;
- struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
- struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
-
- ureg_MOV( ureg, edge_dst, edge_src );
-}
-
-
/**
* Translate Mesa program to TGSI format.
* \param program the program to translate
@@ -1019,9 +983,7 @@ st_translate_mesa_program(
GLuint numOutputs,
const GLuint outputMapping[],
const ubyte outputSemanticName[],
- const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags,
- boolean clamp_color)
+ const ubyte outputSemanticIndex[])
{
struct st_translate translate, *t;
unsigned i;
@@ -1125,8 +1087,6 @@ st_translate_mesa_program(
t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
}
}
- if (passthrough_edgeflags)
- emit_edgeflags( t, program );
}
/* Declare address register.
@@ -1231,7 +1191,7 @@ st_translate_mesa_program(
*/
for (i = 0; i < program->NumInstructions; i++) {
set_insn_start( t, ureg_get_instruction_number( ureg ));
- compile_instruction( ctx, t, &program->Instructions[i], clamp_color );
+ compile_instruction(ctx, t, &program->Instructions[i]);
}
/* Fix up all emitted labels:
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index 62bb654e95a..ed7a3adfe1a 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -58,9 +58,7 @@ st_translate_mesa_program(
GLuint numOutputs,
const GLuint outputMapping[],
const ubyte outputSemanticName[],
- const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags,
- boolean clamp_color);
+ const ubyte outputSemanticIndex[]);
unsigned
st_translate_texture_target(GLuint textarget, GLboolean shadow);
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index a07f8fec309..6a69ba7aa26 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -43,6 +43,8 @@
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_emulate.h"
+#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_ureg.h"
#include "st_debug.h"
@@ -92,6 +94,11 @@ st_release_vp_variants( struct st_context *st,
}
stvp->variants = NULL;
+
+ if (stvp->tgsi.tokens) {
+ tgsi_free_tokens(stvp->tgsi.tokens);
+ stvp->tgsi.tokens = NULL;
+ }
}
@@ -107,8 +114,6 @@ delete_fp_variant(struct st_context *st, struct st_fp_variant *fpv)
cso_delete_fragment_shader(st->cso_context, fpv->driver_shader);
if (fpv->parameters)
_mesa_free_parameter_list(fpv->parameters);
- if (fpv->tgsi.tokens)
- ureg_free_tokens(fpv->tgsi.tokens);
free(fpv);
}
@@ -128,6 +133,11 @@ st_release_fp_variants(struct st_context *st, struct st_fragment_program *stfp)
}
stfp->variants = NULL;
+
+ if (stfp->tgsi.tokens) {
+ ureg_free_tokens(stfp->tgsi.tokens);
+ stfp->tgsi.tokens = NULL;
+ }
}
@@ -160,6 +170,11 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp)
}
stgp->variants = NULL;
+
+ if (stgp->tgsi.tokens) {
+ ureg_free_tokens(stgp->tgsi.tokens);
+ stgp->tgsi.tokens = NULL;
+ }
}
@@ -192,6 +207,11 @@ st_release_tcp_variants(struct st_context *st, struct st_tessctrl_program *sttcp
}
sttcp->variants = NULL;
+
+ if (sttcp->tgsi.tokens) {
+ ureg_free_tokens(sttcp->tgsi.tokens);
+ sttcp->tgsi.tokens = NULL;
+ }
}
@@ -224,28 +244,34 @@ st_release_tep_variants(struct st_context *st, struct st_tesseval_program *sttep
}
sttep->variants = NULL;
+
+ if (sttep->tgsi.tokens) {
+ ureg_free_tokens(sttep->tgsi.tokens);
+ sttep->tgsi.tokens = NULL;
+ }
}
/**
- * Translate a Mesa vertex shader into a TGSI shader.
- * \param outputMapping to map vertex program output registers (VARYING_SLOT_x)
- * to TGSI output slots
- * \param tokensOut destination for TGSI tokens
- * \return pointer to cached pipe_shader object.
+ * Translate a vertex program.
*/
-void
-st_prepare_vertex_program(struct gl_context *ctx,
+bool
+st_translate_vertex_program(struct st_context *st,
struct st_vertex_program *stvp)
{
- struct st_context *st = st_context(ctx);
- GLuint attr;
+ struct ureg_program *ureg;
+ enum pipe_error error;
+ unsigned num_outputs = 0;
+ unsigned attr;
+ unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
+ unsigned output_slot_to_attr[VARYING_SLOT_MAX] = {0};
+ ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
+ ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
stvp->num_inputs = 0;
- stvp->num_outputs = 0;
if (stvp->Base.IsPositionInvariant)
- _mesa_insert_mvp_code(ctx, &stvp->Base);
+ _mesa_insert_mvp_code(st->ctx, &stvp->Base);
/*
* Determine number of inputs, the mappings between VERT_ATTRIB_x
@@ -253,7 +279,7 @@ st_prepare_vertex_program(struct gl_context *ctx,
*/
for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
if ((stvp->Base.Base.InputsRead & BITFIELD64_BIT(attr)) != 0) {
- stvp->input_to_index[attr] = stvp->num_inputs;
+ input_to_index[attr] = stvp->num_inputs;
stvp->index_to_input[stvp->num_inputs] = attr;
stvp->num_inputs++;
if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
@@ -264,7 +290,7 @@ st_prepare_vertex_program(struct gl_context *ctx,
}
}
/* bit of a hack, presetup potentially unused edgeflag input */
- stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
+ input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
/* Compute mapping of vertex program outputs to slots.
@@ -274,62 +300,62 @@ st_prepare_vertex_program(struct gl_context *ctx,
stvp->result_to_output[attr] = ~0;
}
else {
- unsigned slot = stvp->num_outputs++;
+ unsigned slot = num_outputs++;
stvp->result_to_output[attr] = slot;
- stvp->output_slot_to_attr[slot] = attr;
+ output_slot_to_attr[slot] = attr;
switch (attr) {
case VARYING_SLOT_POS:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_COL0:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_COL1:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
- stvp->output_semantic_index[slot] = 1;
+ output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+ output_semantic_index[slot] = 1;
break;
case VARYING_SLOT_BFC0:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_BFC1:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
- stvp->output_semantic_index[slot] = 1;
+ output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+ output_semantic_index[slot] = 1;
break;
case VARYING_SLOT_FOGC:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_PSIZ:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_CLIP_DIST0:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_CLIP_DIST1:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
- stvp->output_semantic_index[slot] = 1;
+ output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+ output_semantic_index[slot] = 1;
break;
case VARYING_SLOT_EDGE:
assert(0);
break;
case VARYING_SLOT_CLIP_VERTEX:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_LAYER:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_VIEWPORT:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
- stvp->output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_TEX0:
@@ -341,8 +367,8 @@ st_prepare_vertex_program(struct gl_context *ctx,
case VARYING_SLOT_TEX6:
case VARYING_SLOT_TEX7:
if (st->needs_texcoord_semantic) {
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
- stvp->output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
+ output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
break;
}
/* fall through */
@@ -350,55 +376,24 @@ st_prepare_vertex_program(struct gl_context *ctx,
default:
assert(attr >= VARYING_SLOT_VAR0 ||
(attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- stvp->output_semantic_index[slot] =
+ output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+ output_semantic_index[slot] =
st_get_generic_varying_index(st, attr);
break;
}
}
}
/* similar hack to above, presetup potentially unused edgeflag output */
- stvp->result_to_output[VARYING_SLOT_EDGE] = stvp->num_outputs;
- stvp->output_semantic_name[stvp->num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
- stvp->output_semantic_index[stvp->num_outputs] = 0;
-}
-
-
-/**
- * Translate a vertex program to create a new variant.
- */
-static struct st_vp_variant *
-st_translate_vertex_program(struct st_context *st,
- struct st_vertex_program *stvp,
- const struct st_vp_variant_key *key)
-{
- struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
- struct pipe_context *pipe = st->pipe;
- struct ureg_program *ureg;
- enum pipe_error error;
- unsigned num_outputs;
-
- st_prepare_vertex_program(st->ctx, stvp);
+ stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
+ output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
+ output_semantic_index[num_outputs] = 0;
if (!stvp->glsl_to_tgsi)
- {
_mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
- }
ureg = ureg_create_with_screen(TGSI_PROCESSOR_VERTEX, st->pipe->screen);
- if (ureg == NULL) {
- free(vpv);
- return NULL;
- }
-
- vpv->key = *key;
-
- vpv->num_inputs = stvp->num_inputs;
- num_outputs = stvp->num_outputs;
- if (key->passthrough_edgeflags) {
- vpv->num_inputs++;
- num_outputs++;
- }
+ if (ureg == NULL)
+ return false;
if (ST_DEBUG & DEBUG_MESA) {
_mesa_print_program(&stvp->Base.Base);
@@ -406,15 +401,15 @@ st_translate_vertex_program(struct st_context *st,
debug_printf("\n");
}
- if (stvp->glsl_to_tgsi)
+ if (stvp->glsl_to_tgsi) {
error = st_translate_program(st->ctx,
TGSI_PROCESSOR_VERTEX,
ureg,
stvp->glsl_to_tgsi,
&stvp->Base.Base,
/* inputs */
- vpv->num_inputs,
- stvp->input_to_index,
+ stvp->num_inputs,
+ input_to_index,
NULL, /* inputSlotToAttr */
NULL, /* input semantic name */
NULL, /* input semantic index */
@@ -423,43 +418,75 @@ st_translate_vertex_program(struct st_context *st,
/* outputs */
num_outputs,
stvp->result_to_output,
- stvp->output_slot_to_attr,
- stvp->output_semantic_name,
- stvp->output_semantic_index,
- key->passthrough_edgeflags,
- key->clamp_color);
- else
+ output_slot_to_attr,
+ output_semantic_name,
+ output_semantic_index);
+
+ st_translate_stream_output_info(stvp->glsl_to_tgsi,
+ stvp->result_to_output,
+ &stvp->tgsi.stream_output);
+
+ free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
+ stvp->glsl_to_tgsi = NULL;
+ } else
error = st_translate_mesa_program(st->ctx,
TGSI_PROCESSOR_VERTEX,
ureg,
&stvp->Base.Base,
/* inputs */
- vpv->num_inputs,
- stvp->input_to_index,
+ stvp->num_inputs,
+ input_to_index,
NULL, /* input semantic name */
NULL, /* input semantic index */
NULL,
/* outputs */
num_outputs,
stvp->result_to_output,
- stvp->output_semantic_name,
- stvp->output_semantic_index,
- key->passthrough_edgeflags,
- key->clamp_color);
+ output_semantic_name,
+ output_semantic_index);
+
+ if (error) {
+ debug_printf("%s: failed to translate Mesa program:\n", __func__);
+ _mesa_print_program(&stvp->Base.Base);
+ debug_assert(0);
+ return false;
+ }
+
+ stvp->tgsi.tokens = ureg_get_tokens(ureg, NULL);
+ ureg_destroy(ureg);
+ return stvp->tgsi.tokens != NULL;
+}
- if (error)
- goto fail;
+static struct st_vp_variant *
+st_create_vp_variant(struct st_context *st,
+ struct st_vertex_program *stvp,
+ const struct st_vp_variant_key *key)
+{
+ struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
+ struct pipe_context *pipe = st->pipe;
- vpv->tgsi.tokens = ureg_get_tokens( ureg, NULL );
- if (!vpv->tgsi.tokens)
- goto fail;
+ vpv->key = *key;
+ vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
+ vpv->tgsi.stream_output = stvp->tgsi.stream_output;
+ vpv->num_inputs = stvp->num_inputs;
- ureg_destroy( ureg );
+ /* Emulate features. */
+ if (key->clamp_color || key->passthrough_edgeflags) {
+ const struct tgsi_token *tokens;
+ unsigned flags =
+ (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
+ (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
- if (stvp->glsl_to_tgsi) {
- st_translate_stream_output_info(stvp->glsl_to_tgsi,
- stvp->result_to_output,
- &vpv->tgsi.stream_output);
+ tokens = tgsi_emulate(vpv->tgsi.tokens, flags);
+
+ if (tokens) {
+ tgsi_free_tokens(vpv->tgsi.tokens);
+ vpv->tgsi.tokens = tokens;
+
+ if (key->passthrough_edgeflags)
+ vpv->num_inputs++;
+ } else
+ fprintf(stderr, "mesa: cannot emulate deprecated features\n");
}
if (ST_DEBUG & DEBUG_TGSI) {
@@ -469,14 +496,6 @@ st_translate_vertex_program(struct st_context *st,
vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
return vpv;
-
-fail:
- debug_printf("%s: failed to translate Mesa program:\n", __func__);
- _mesa_print_program(&stvp->Base.Base);
- debug_assert(0);
-
- ureg_destroy( ureg );
- return NULL;
}
@@ -499,7 +518,7 @@ st_get_vp_variant(struct st_context *st,
if (!vpv) {
/* create now */
- vpv = st_translate_vertex_program(st, stvp, key);
+ vpv = st_create_vp_variant(st, stvp, key);
if (vpv) {
/* insert into list */
vpv->next = stvp->variants;
@@ -533,19 +552,12 @@ st_translate_interp(enum glsl_interp_qualifier glsl_qual, bool is_color)
/**
- * Translate a Mesa fragment shader into a TGSI shader using extra info in
- * the key.
- * \return new fragment program variant
+ * Translate a Mesa fragment shader into a TGSI shader.
*/
-static struct st_fp_variant *
+bool
st_translate_fragment_program(struct st_context *st,
- struct st_fragment_program *stfp,
- const struct st_fp_variant_key *key)
+ struct st_fragment_program *stfp)
{
- struct pipe_context *pipe = st->pipe;
- struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
- GLboolean deleteFP = GL_FALSE;
-
GLuint outputMapping[FRAG_RESULT_MAX];
GLuint inputMapping[VARYING_SLOT_MAX];
GLuint inputSlotToAttr[VARYING_SLOT_MAX];
@@ -565,40 +577,8 @@ st_translate_fragment_program(struct st_context *st,
ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
uint fs_num_outputs = 0;
- if (!variant)
- return NULL;
-
- assert(!(key->bitmap && key->drawpixels));
memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
- if (key->bitmap) {
- /* glBitmap drawing */
- struct gl_fragment_program *fp; /* we free this temp program below */
-
- st_make_bitmap_fragment_program(st, &stfp->Base,
- &fp, &variant->bitmap_sampler);
-
- variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters);
- stfp = st_fragment_program(fp);
- deleteFP = GL_TRUE;
- }
- else if (key->drawpixels) {
- /* glDrawPixels drawing */
- struct gl_fragment_program *fp; /* we free this temp program below */
-
- if (key->drawpixels_z || key->drawpixels_stencil) {
- fp = st_make_drawpix_z_stencil_program(st, key->drawpixels_z,
- key->drawpixels_stencil);
- }
- else {
- /* RGBA */
- st_make_drawpix_fragment_program(st, &stfp->Base, &fp);
- variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters);
- deleteFP = GL_TRUE;
- }
- stfp = st_fragment_program(fp);
- }
-
if (!stfp->glsl_to_tgsi)
_mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
@@ -620,8 +600,7 @@ st_translate_fragment_program(struct st_context *st,
interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTER;
if (stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
- SYSTEM_BIT_SAMPLE_POS) ||
- key->persample_shading)
+ SYSTEM_BIT_SAMPLE_POS))
interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE;
switch (attr) {
@@ -805,10 +784,8 @@ st_translate_fragment_program(struct st_context *st,
}
ureg = ureg_create_with_screen(TGSI_PROCESSOR_FRAGMENT, st->pipe->screen);
- if (ureg == NULL) {
- free(variant);
- return NULL;
- }
+ if (ureg == NULL)
+ return false;
if (ST_DEBUG & DEBUG_MESA) {
_mesa_print_program(&stfp->Base.Base);
@@ -841,7 +818,7 @@ st_translate_fragment_program(struct st_context *st,
}
}
- if (stfp->glsl_to_tgsi)
+ if (stfp->glsl_to_tgsi) {
st_translate_program(st->ctx,
TGSI_PROCESSOR_FRAGMENT,
ureg,
@@ -860,9 +837,11 @@ st_translate_fragment_program(struct st_context *st,
outputMapping,
NULL,
fs_output_semantic_name,
- fs_output_semantic_index, FALSE,
- key->clamp_color );
- else
+ fs_output_semantic_index);
+
+ free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
+ stfp->glsl_to_tgsi = NULL;
+ } else
st_translate_mesa_program(st->ctx,
TGSI_PROCESSOR_FRAGMENT,
ureg,
@@ -877,31 +856,134 @@ st_translate_fragment_program(struct st_context *st,
fs_num_outputs,
outputMapping,
fs_output_semantic_name,
- fs_output_semantic_index, FALSE,
- key->clamp_color);
+ fs_output_semantic_index);
+
+ stfp->tgsi.tokens = ureg_get_tokens(ureg, NULL);
+ ureg_destroy(ureg);
+ return stfp->tgsi.tokens != NULL;
+}
+
+static struct st_fp_variant *
+st_create_fp_variant(struct st_context *st,
+ struct st_fragment_program *stfp,
+ const struct st_fp_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
+ struct pipe_shader_state tgsi = {0};
+
+ if (!variant)
+ return NULL;
+
+ tgsi.tokens = stfp->tgsi.tokens;
- variant->tgsi.tokens = ureg_get_tokens( ureg, NULL );
- ureg_destroy( ureg );
+ assert(!(key->bitmap && key->drawpixels));
+
+ /* Emulate features. */
+ if (key->clamp_color || key->persample_shading) {
+ const struct tgsi_token *tokens;
+ unsigned flags =
+ (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
+ (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
+
+ tokens = tgsi_emulate(tgsi.tokens, flags);
+
+ if (tokens)
+ tgsi.tokens = tokens;
+ else
+ fprintf(stderr, "mesa: cannot emulate deprecated features\n");
+ }
+
+ /* glBitmap */
+ if (key->bitmap) {
+ const struct tgsi_token *tokens;
+
+ variant->bitmap_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1;
+
+ tokens = st_get_bitmap_shader(tgsi.tokens,
+ variant->bitmap_sampler,
+ st->needs_texcoord_semantic,
+ st->bitmap.tex_format ==
+ PIPE_FORMAT_L8_UNORM);
+
+ if (tokens) {
+ if (tgsi.tokens != stfp->tgsi.tokens)
+ tgsi_free_tokens(tgsi.tokens);
+ tgsi.tokens = tokens;
+ variant->parameters =
+ _mesa_clone_parameter_list(stfp->Base.Base.Parameters);
+ } else
+ fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
+ }
+
+ /* glDrawPixels (color only) */
+ if (key->drawpixels) {
+ const struct tgsi_token *tokens;
+ unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
+
+ /* Find the first unused slot. */
+ variant->drawpix_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1;
+
+ if (key->pixelMaps) {
+ unsigned samplers_used = stfp->Base.Base.SamplersUsed |
+ (1 << variant->drawpix_sampler);
+
+ variant->pixelmap_sampler = ffs(~samplers_used) - 1;
+ }
+
+ variant->parameters =
+ _mesa_clone_parameter_list(stfp->Base.Base.Parameters);
+
+ if (key->scaleAndBias) {
+ static const gl_state_index scale_state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_PT_SCALE };
+ static const gl_state_index bias_state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_PT_BIAS };
+
+ scale_const = _mesa_add_state_reference(variant->parameters,
+ scale_state);
+ bias_const = _mesa_add_state_reference(variant->parameters,
+ bias_state);
+ }
+
+ {
+ static const gl_state_index state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
+
+ texcoord_const = _mesa_add_state_reference(variant->parameters,
+ state);
+ }
+
+ tokens = st_get_drawpix_shader(tgsi.tokens,
+ st->needs_texcoord_semantic,
+ key->scaleAndBias, scale_const,
+ bias_const, key->pixelMaps,
+ variant->drawpix_sampler,
+ variant->pixelmap_sampler,
+ texcoord_const);
+
+ if (tokens) {
+ if (tgsi.tokens != stfp->tgsi.tokens)
+ tgsi_free_tokens(tgsi.tokens);
+ tgsi.tokens = tokens;
+ } else
+ fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
+ }
if (ST_DEBUG & DEBUG_TGSI) {
- tgsi_dump(variant->tgsi.tokens, 0/*TGSI_DUMP_VERBOSE*/);
+ tgsi_dump(tgsi.tokens, 0);
debug_printf("\n");
}
/* fill in variant */
- variant->driver_shader = pipe->create_fs_state(pipe, &variant->tgsi);
+ variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
variant->key = *key;
- if (deleteFP) {
- /* Free the temporary program made above */
- struct gl_fragment_program *fp = &stfp->Base;
- _mesa_reference_fragprog(st->ctx, &fp, NULL);
- }
-
+ if (tgsi.tokens != stfp->tgsi.tokens)
+ tgsi_free_tokens(tgsi.tokens);
return variant;
}
-
/**
* Translate fragment program if needed.
*/
@@ -921,7 +1003,7 @@ st_get_fp_variant(struct st_context *st,
if (!fpv) {
/* create new */
- fpv = st_translate_fragment_program(st, stfp, key);
+ fpv = st_create_fp_variant(st, stfp, key);
if (fpv) {
/* insert into list */
fpv->next = stfp->variants;
@@ -1191,9 +1273,7 @@ st_translate_program_common(struct st_context *st,
outputMapping,
outputSlotToAttr,
output_semantic_name,
- output_semantic_index,
- FALSE,
- FALSE);
+ output_semantic_index);
out_state->tokens = ureg_get_tokens(ureg, NULL);
ureg_destroy(ureg);
@@ -1217,19 +1297,15 @@ st_translate_program_common(struct st_context *st,
/**
* Translate a geometry program to create a new variant.
*/
-static struct st_gp_variant *
+bool
st_translate_geometry_program(struct st_context *st,
- struct st_geometry_program *stgp,
- const struct st_gp_variant_key *key)
+ struct st_geometry_program *stgp)
{
- struct pipe_context *pipe = st->pipe;
struct ureg_program *ureg;
- struct st_gp_variant *gpv;
- struct pipe_shader_state state;
ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen);
if (ureg == NULL)
- return NULL;
+ return false;
ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType);
ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType);
@@ -1238,19 +1314,29 @@ st_translate_geometry_program(struct st_context *st,
ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations);
st_translate_program_common(st, &stgp->Base.Base, stgp->glsl_to_tgsi, ureg,
- TGSI_PROCESSOR_GEOMETRY, &state);
+ TGSI_PROCESSOR_GEOMETRY, &stgp->tgsi);
+
+ free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi);
+ stgp->glsl_to_tgsi = NULL;
+ return true;
+}
+
+
+static struct st_gp_variant *
+st_create_gp_variant(struct st_context *st,
+ struct st_geometry_program *stgp,
+ const struct st_gp_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_gp_variant *gpv;
gpv = CALLOC_STRUCT(st_gp_variant);
- if (!gpv) {
- ureg_free_tokens(state.tokens);
+ if (!gpv)
return NULL;
- }
/* fill in new variant */
- gpv->driver_shader = pipe->create_gs_state(pipe, &state);
+ gpv->driver_shader = pipe->create_gs_state(pipe, &stgp->tgsi);
gpv->key = *key;
-
- ureg_free_tokens(state.tokens);
return gpv;
}
@@ -1274,7 +1360,7 @@ st_get_gp_variant(struct st_context *st,
if (!gpv) {
/* create new */
- gpv = st_translate_geometry_program(st, stgp, key);
+ gpv = st_create_gp_variant(st, stgp, key);
if (gpv) {
/* insert into list */
gpv->next = stgp->variants;
@@ -1289,38 +1375,43 @@ st_get_gp_variant(struct st_context *st,
/**
* Translate a tessellation control program to create a new variant.
*/
-static struct st_tcp_variant *
+bool
st_translate_tessctrl_program(struct st_context *st,
- struct st_tessctrl_program *sttcp,
- const struct st_tcp_variant_key *key)
+ struct st_tessctrl_program *sttcp)
{
- struct pipe_context *pipe = st->pipe;
struct ureg_program *ureg;
- struct st_tcp_variant *tcpv;
- struct pipe_shader_state state;
- ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_CTRL, pipe->screen);
- if (ureg == NULL) {
- return NULL;
- }
+ ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_CTRL, st->pipe->screen);
+ if (ureg == NULL)
+ return false;
ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
sttcp->Base.VerticesOut);
st_translate_program_common(st, &sttcp->Base.Base, sttcp->glsl_to_tgsi,
- ureg, TGSI_PROCESSOR_TESS_CTRL, &state);
+ ureg, TGSI_PROCESSOR_TESS_CTRL, &sttcp->tgsi);
+
+ free_glsl_to_tgsi_visitor(sttcp->glsl_to_tgsi);
+ sttcp->glsl_to_tgsi = NULL;
+ return true;
+}
+
+
+static struct st_tcp_variant *
+st_create_tcp_variant(struct st_context *st,
+ struct st_tessctrl_program *sttcp,
+ const struct st_tcp_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_tcp_variant *tcpv;
tcpv = CALLOC_STRUCT(st_tcp_variant);
- if (!tcpv) {
- ureg_free_tokens(state.tokens);
+ if (!tcpv)
return NULL;
- }
/* fill in new variant */
- tcpv->driver_shader = pipe->create_tcs_state(pipe, &state);
+ tcpv->driver_shader = pipe->create_tcs_state(pipe, &sttcp->tgsi);
tcpv->key = *key;
-
- ureg_free_tokens(state.tokens);
return tcpv;
}
@@ -1344,7 +1435,7 @@ st_get_tcp_variant(struct st_context *st,
if (!tcpv) {
/* create new */
- tcpv = st_translate_tessctrl_program(st, sttcp, key);
+ tcpv = st_create_tcp_variant(st, sttcp, key);
if (tcpv) {
/* insert into list */
tcpv->next = sttcp->variants;
@@ -1359,20 +1450,15 @@ st_get_tcp_variant(struct st_context *st,
/**
* Translate a tessellation evaluation program to create a new variant.
*/
-static struct st_tep_variant *
+bool
st_translate_tesseval_program(struct st_context *st,
- struct st_tesseval_program *sttep,
- const struct st_tep_variant_key *key)
+ struct st_tesseval_program *sttep)
{
- struct pipe_context *pipe = st->pipe;
struct ureg_program *ureg;
- struct st_tep_variant *tepv;
- struct pipe_shader_state state;
- ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_EVAL, pipe->screen);
- if (ureg == NULL) {
- return NULL;
- }
+ ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_EVAL, st->pipe->screen);
+ if (ureg == NULL)
+ return false;
if (sttep->Base.PrimitiveMode == GL_ISOLINES)
ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
@@ -1400,19 +1486,29 @@ st_translate_tesseval_program(struct st_context *st,
ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE, sttep->Base.PointMode);
st_translate_program_common(st, &sttep->Base.Base, sttep->glsl_to_tgsi,
- ureg, TGSI_PROCESSOR_TESS_EVAL, &state);
+ ureg, TGSI_PROCESSOR_TESS_EVAL, &sttep->tgsi);
+
+ free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi);
+ sttep->glsl_to_tgsi = NULL;
+ return true;
+}
+
+
+static struct st_tep_variant *
+st_create_tep_variant(struct st_context *st,
+ struct st_tesseval_program *sttep,
+ const struct st_tep_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_tep_variant *tepv;
tepv = CALLOC_STRUCT(st_tep_variant);
- if (!tepv) {
- ureg_free_tokens(state.tokens);
+ if (!tepv)
return NULL;
- }
/* fill in new variant */
- tepv->driver_shader = pipe->create_tes_state(pipe, &state);
+ tepv->driver_shader = pipe->create_tes_state(pipe, &sttep->tgsi);
tepv->key = *key;
-
- ureg_free_tokens(state.tokens);
return tepv;
}
@@ -1436,7 +1532,7 @@ st_get_tep_variant(struct st_context *st,
if (!tepv) {
/* create new */
- tepv = st_translate_tesseval_program(st, sttep, key);
+ tepv = st_create_tep_variant(st, sttep, key);
if (tepv) {
/* insert into list */
tepv->next = sttep->variants;
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index 7013993fe38..d9b53ac008c 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -59,8 +59,6 @@ struct st_fp_variant_key
GLuint drawpixels:1; /**< glDrawPixels variant */
GLuint scaleAndBias:1; /**< glDrawPixels w/ scale and/or bias? */
GLuint pixelMaps:1; /**< glDrawPixels w/ pixel lookup map? */
- GLuint drawpixels_z:1; /**< glDrawPixels(GL_DEPTH) */
- GLuint drawpixels_stencil:1; /**< glDrawPixels(GL_STENCIL) */
/** for ARB_color_buffer_float */
GLuint clamp_color:1;
@@ -78,8 +76,6 @@ struct st_fp_variant
/** Parameters which generated this version of fragment program */
struct st_fp_variant_key key;
- struct pipe_shader_state tgsi;
-
/** Driver's compiled shader */
void *driver_shader;
@@ -87,6 +83,10 @@ struct st_fp_variant
struct gl_program_parameter_list *parameters;
uint bitmap_sampler;
+ /** For glDrawPixels variants */
+ unsigned drawpix_sampler;
+ unsigned pixelmap_sampler;
+
/** next in linked list */
struct st_fp_variant *next;
};
@@ -98,6 +98,7 @@ struct st_fp_variant
struct st_fragment_program
{
struct gl_fragment_program Base;
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
struct st_fp_variant *variants;
@@ -153,20 +154,16 @@ struct st_vp_variant
struct st_vertex_program
{
struct gl_vertex_program Base; /**< The Mesa vertex program */
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
/** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
- GLuint input_to_index[VERT_ATTRIB_MAX];
/** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */
GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
GLuint num_inputs;
/** Maps VARYING_SLOT_x to slot */
GLuint result_to_output[VARYING_SLOT_MAX];
- GLuint output_slot_to_attr[VARYING_SLOT_MAX];
- ubyte output_semantic_name[VARYING_SLOT_MAX];
- ubyte output_semantic_index[VARYING_SLOT_MAX];
- GLuint num_outputs;
/** List of translated variants of this vertex program.
*/
@@ -203,6 +200,7 @@ struct st_gp_variant
struct st_geometry_program
{
struct gl_geometry_program Base; /**< The Mesa geometry program */
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
struct st_gp_variant *variants;
@@ -238,6 +236,7 @@ struct st_tcp_variant
struct st_tessctrl_program
{
struct gl_tess_ctrl_program Base; /**< The Mesa tess ctrl program */
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
struct st_tcp_variant *variants;
@@ -273,6 +272,7 @@ struct st_tep_variant
struct st_tesseval_program
{
struct gl_tess_eval_program Base; /**< The Mesa tess eval program */
+ struct pipe_shader_state tgsi;
struct glsl_to_tgsi_visitor* glsl_to_tgsi;
struct st_tep_variant *variants;
@@ -414,16 +414,6 @@ st_get_tep_variant(struct st_context *st,
struct st_tesseval_program *stgp,
const struct st_tep_variant_key *key);
-
-extern void
-st_prepare_vertex_program(struct gl_context *ctx,
- struct st_vertex_program *stvp);
-
-extern GLboolean
-st_prepare_fragment_program(struct gl_context *ctx,
- struct st_fragment_program *stfp);
-
-
extern void
st_release_vp_variants( struct st_context *st,
struct st_vertex_program *stvp );
@@ -447,6 +437,25 @@ st_release_tep_variants(struct st_context *st,
extern void
st_destroy_program_variants(struct st_context *st);
+extern bool
+st_translate_vertex_program(struct st_context *st,
+ struct st_vertex_program *stvp);
+
+extern bool
+st_translate_fragment_program(struct st_context *st,
+ struct st_fragment_program *stfp);
+
+extern bool
+st_translate_geometry_program(struct st_context *st,
+ struct st_geometry_program *stgp);
+
+extern bool
+st_translate_tessctrl_program(struct st_context *st,
+ struct st_tessctrl_program *sttcp);
+
+extern bool
+st_translate_tesseval_program(struct st_context *st,
+ struct st_tesseval_program *sttep);
extern void
st_print_current_vertex_program(void);
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index c130ab3f93d..6f29abbe1ba 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -35,6 +35,7 @@
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/enums.h"
+#include "util/half_float.h"
#include "t_context.h"
#include "tnl.h"
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index e3eb286e482..5e1a760eb2c 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -33,7 +33,6 @@
#include "vbo.h"
#include "vbo_context.h"
-#define NR_MAT_ATTRIBS 12
static GLuint check_size( const GLfloat *attr )
{
@@ -44,32 +43,47 @@ static GLuint check_size( const GLfloat *attr )
}
+/**
+ * Helper for initializing a vertex array.
+ */
+static void
+init_array(struct gl_context *ctx, struct gl_client_array *cl,
+ unsigned size, const void *pointer)
+{
+ memset(cl, 0, sizeof(*cl));
+
+ cl->Size = size;
+ cl->Type = GL_FLOAT;
+ cl->Format = GL_RGBA;
+ cl->Stride = 0;
+ cl->StrideB = 0;
+ cl->_ElementSize = cl->Size * sizeof(GLfloat);
+ cl->Ptr = pointer;
+ cl->Enabled = 1;
+
+ _mesa_reference_buffer_object(ctx, &cl->BufferObj,
+ ctx->Shared->NullBufferObj);
+}
+
+
+/**
+ * Set up the vbo->currval arrays to point at the context's current
+ * vertex attributes (with strides = 0).
+ */
static void init_legacy_currval(struct gl_context *ctx)
{
struct vbo_context *vbo = vbo_context(ctx);
- struct gl_client_array *arrays = &vbo->currval[VBO_ATTRIB_POS];
GLuint i;
- memset(arrays, 0, sizeof(*arrays) * VERT_ATTRIB_FF_MAX);
-
/* Set up a constant (StrideB == 0) array for each current
* attribute:
*/
for (i = 0; i < VERT_ATTRIB_FF_MAX; i++) {
- struct gl_client_array *cl = &arrays[i];
+ struct gl_client_array *cl = &vbo->currval[VERT_ATTRIB_FF(i)];
- /* Size will have to be determined at runtime:
- */
- cl->Size = check_size(ctx->Current.Attrib[i]);
- cl->Stride = 0;
- cl->StrideB = 0;
- cl->Enabled = 1;
- cl->Type = GL_FLOAT;
- cl->Format = GL_RGBA;
- cl->Ptr = (const void *)ctx->Current.Attrib[i];
- cl->_ElementSize = cl->Size * sizeof(GLfloat);
- _mesa_reference_buffer_object(ctx, &cl->BufferObj,
- ctx->Shared->NullBufferObj);
+ init_array(ctx, cl,
+ check_size(ctx->Current.Attrib[i]),
+ ctx->Current.Attrib[i]);
}
}
@@ -77,26 +91,12 @@ static void init_legacy_currval(struct gl_context *ctx)
static void init_generic_currval(struct gl_context *ctx)
{
struct vbo_context *vbo = vbo_context(ctx);
- struct gl_client_array *arrays = &vbo->currval[VBO_ATTRIB_GENERIC0];
GLuint i;
- memset(arrays, 0, sizeof(*arrays) * VERT_ATTRIB_GENERIC_MAX);
-
for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
- struct gl_client_array *cl = &arrays[i];
+ struct gl_client_array *cl = &vbo->currval[VBO_ATTRIB_GENERIC0 + i];
- /* This will have to be determined at runtime:
- */
- cl->Size = 1;
- cl->Type = GL_FLOAT;
- cl->Format = GL_RGBA;
- cl->Ptr = (const void *)ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + i];
- cl->Stride = 0;
- cl->StrideB = 0;
- cl->Enabled = 1;
- cl->_ElementSize = cl->Size * sizeof(GLfloat);
- _mesa_reference_buffer_object(ctx, &cl->BufferObj,
- ctx->Shared->NullBufferObj);
+ init_array(ctx, cl, 1, ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + i]);
}
}
@@ -104,46 +104,34 @@ static void init_generic_currval(struct gl_context *ctx)
static void init_mat_currval(struct gl_context *ctx)
{
struct vbo_context *vbo = vbo_context(ctx);
- struct gl_client_array *arrays =
- &vbo->currval[VBO_ATTRIB_MAT_FRONT_AMBIENT];
GLuint i;
- assert(NR_MAT_ATTRIBS == MAT_ATTRIB_MAX);
-
- memset(arrays, 0, sizeof(*arrays) * NR_MAT_ATTRIBS);
-
/* Set up a constant (StrideB == 0) array for each current
* attribute:
*/
- for (i = 0; i < NR_MAT_ATTRIBS; i++) {
- struct gl_client_array *cl = &arrays[i];
+ for (i = 0; i < MAT_ATTRIB_MAX; i++) {
+ struct gl_client_array *cl =
+ &vbo->currval[VBO_ATTRIB_MAT_FRONT_AMBIENT + i];
+ unsigned size;
/* Size is fixed for the material attributes, for others will
* be determined at runtime:
*/
- switch (i - VERT_ATTRIB_GENERIC0) {
+ switch (i) {
case MAT_ATTRIB_FRONT_SHININESS:
case MAT_ATTRIB_BACK_SHININESS:
- cl->Size = 1;
- break;
+ size = 1;
+ break;
case MAT_ATTRIB_FRONT_INDEXES:
case MAT_ATTRIB_BACK_INDEXES:
- cl->Size = 3;
- break;
+ size = 3;
+ break;
default:
- cl->Size = 4;
- break;
+ size = 4;
+ break;
}
- cl->Ptr = (const void *)ctx->Light.Material.Attrib[i];
- cl->Type = GL_FLOAT;
- cl->Format = GL_RGBA;
- cl->Stride = 0;
- cl->StrideB = 0;
- cl->Enabled = 1;
- cl->_ElementSize = cl->Size * sizeof(GLfloat);
- _mesa_reference_buffer_object(ctx, &cl->BufferObj,
- ctx->Shared->NullBufferObj);
+ init_array(ctx, cl, size, ctx->Light.Material.Attrib[i]);
}
}
@@ -175,7 +163,7 @@ GLboolean _vbo_CreateContext( struct gl_context *ctx )
for (i = 0; i < ARRAY_SIZE(vbo->map_vp_none); i++)
vbo->map_vp_none[i] = i;
/* map material attribs to generic slots */
- for (i = 0; i < NR_MAT_ATTRIBS; i++)
+ for (i = 0; i < MAT_ATTRIB_MAX; i++)
vbo->map_vp_none[VERT_ATTRIB_GENERIC(i)]
= VBO_ATTRIB_MAT_FRONT_AMBIENT + i;
diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h
index 80f3015925d..00378eb7984 100644
--- a/src/mesa/vbo/vbo_exec.h
+++ b/src/mesa/vbo/vbo_exec.h
@@ -79,7 +79,7 @@ struct vbo_exec_copied_vtx {
struct vbo_exec_context
{
- struct gl_context *ctx;
+ struct gl_context *ctx;
GLvertexformat vtxfmt;
GLvertexformat vtxfmt_noop;
GLboolean validating; /**< if we're in the middle of state validation */
@@ -97,15 +97,17 @@ struct vbo_exec_context
GLuint buffer_used; /* in bytes */
fi_type vertex[VBO_ATTRIB_MAX*4]; /* current vertex */
- GLuint vert_count;
- GLuint max_vert;
+ GLuint vert_count; /**< Number of vertices currently in buffer */
+ GLuint max_vert; /**< Max number of vertices allowed in buffer */
struct vbo_exec_copied_vtx copied;
- GLubyte attrsz[VBO_ATTRIB_MAX];
- GLenum attrtype[VBO_ATTRIB_MAX];
- GLubyte active_sz[VBO_ATTRIB_MAX];
+ GLubyte attrsz[VBO_ATTRIB_MAX]; /**< nr. of attrib components (1..4) */
+ GLenum attrtype[VBO_ATTRIB_MAX]; /**< GL_FLOAT, GL_DOUBLE, GL_INT, etc */
+ GLubyte active_sz[VBO_ATTRIB_MAX]; /**< attrib size (nr. 32-bit words) */
+ /** pointers into the current 'vertex' array, declared above */
fi_type *attrptr[VBO_ATTRIB_MAX];
+
struct gl_client_array arrays[VERT_ATTRIB_MAX];
/* According to program mode, the values above plus current
@@ -115,7 +117,6 @@ struct vbo_exec_context
const struct gl_client_array *inputs[VERT_ATTRIB_MAX];
} vtx;
-
struct {
GLboolean recalculate_maps;
struct vbo_exec_eval1_map map1[VERT_ATTRIB_MAX];
@@ -131,7 +132,7 @@ struct vbo_exec_context
GLboolean recalculate_inputs;
} array;
- /* Which flags to set in vbo_exec_BeginVertices() */
+ /* Which flags to set in vbo_exec_begin_vertices() */
GLbitfield begin_vertices_flags;
#ifdef DEBUG
@@ -147,8 +148,6 @@ void vbo_exec_init( struct gl_context *ctx );
void vbo_exec_destroy( struct gl_context *ctx );
void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state );
-void vbo_exec_BeginVertices( struct gl_context *ctx );
-
/* Internal functions:
*/
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 583a2f9b79f..7ae08fe3062 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -375,13 +375,16 @@ vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec,
* This is when a vertex attribute transitions to a different size.
* For example, we saw a bunch of glTexCoord2f() calls and now we got a
* glTexCoord4f() call. We promote the array from size=2 to size=4.
+ * \param newSize size of new vertex (number of 32-bit words).
*/
static void
-vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr, GLuint newSize, GLenum newType)
+vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr,
+ GLuint newSize, GLenum newType)
{
struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
- if (newSize > exec->vtx.attrsz[attr] || newType != exec->vtx.attrtype[attr]) {
+ if (newSize > exec->vtx.attrsz[attr] ||
+ newType != exec->vtx.attrtype[attr]) {
/* New size is larger. Need to flush existing vertices and get
* an enlarged vertex format.
*/
@@ -411,20 +414,49 @@ vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr, GLuint newSize, GLenu
/**
+ * Called upon first glVertex, glColor, glTexCoord, etc.
+ */
+static void
+vbo_exec_begin_vertices(struct gl_context *ctx)
+{
+ struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+
+ vbo_exec_vtx_map( exec );
+
+ assert((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0);
+ assert(exec->begin_vertices_flags);
+
+ ctx->Driver.NeedFlush |= exec->begin_vertices_flags;
+}
+
+
+/**
* This macro is used to implement all the glVertex, glColor, glTexCoord,
* glVertexAttrib, etc functions.
+ * \param A attribute index
+ * \param N attribute size (1..4)
+ * \param T type (GL_FLOAT, GL_DOUBLE, GL_INT, GL_UNSIGNED_INT)
+ * \param C cast type (fi_type or double)
+ * \param V0, V1, v2, V3 attribute value
*/
#define ATTR_UNION( A, N, T, C, V0, V1, V2, V3 ) \
do { \
struct vbo_exec_context *exec = &vbo_context(ctx)->exec; \
int sz = (sizeof(C) / sizeof(GLfloat)); \
- if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) \
- vbo_exec_BeginVertices(ctx); \
\
+ assert(sz == 1 || sz == 2); \
+ \
+ if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) { \
+ vbo_exec_begin_vertices(ctx); \
+ } \
+ \
+ /* check if attribute size or type is changing */ \
if (unlikely(exec->vtx.active_sz[A] != N * sz) || \
- unlikely(exec->vtx.attrtype[A] != T)) \
+ unlikely(exec->vtx.attrtype[A] != T)) { \
vbo_exec_fixup_vertex(ctx, A, N * sz, T); \
+ } \
\
+ /* store vertex attribute in vertex buffer */ \
{ \
C *dest = (C *)exec->vtx.attrptr[A]; \
if (N>0) dest[0] = V0; \
@@ -438,6 +470,7 @@ do { \
/* This is a glVertex call */ \
GLuint i; \
\
+ /* copy 32-bit words */ \
for (i = 0; i < exec->vtx.vertex_size; i++) \
exec->vtx.buffer_ptr[i] = exec->vtx.vertex[i]; \
\
@@ -1149,22 +1182,6 @@ void vbo_exec_vtx_destroy( struct vbo_exec_context *exec )
/**
- * Called upon first glVertex, glColor, glTexCoord, etc.
- */
-void vbo_exec_BeginVertices( struct gl_context *ctx )
-{
- struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-
- vbo_exec_vtx_map( exec );
-
- assert((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0);
- assert(exec->begin_vertices_flags);
-
- ctx->Driver.NeedFlush |= exec->begin_vertices_flags;
-}
-
-
-/**
* If inside glBegin()/glEnd(), it should assert(0). Otherwise, if
* FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered
* vertices, if FLUSH_UPDATE_CURRENT bit is set updates
@@ -1197,7 +1214,7 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags )
/* Flush (draw), and make sure VBO is left unmapped when done */
vbo_exec_FlushVertices_internal(exec, GL_TRUE);
- /* Need to do this to ensure vbo_exec_BeginVertices gets called again:
+ /* Need to do this to ensure vbo_exec_begin_vertices gets called again:
*/
ctx->Driver.NeedFlush &= ~(FLUSH_UPDATE_CURRENT | flags);
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 2bfb0c32b73..174cbc37c26 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -53,10 +53,10 @@ vbo_exec_debug_verts( struct vbo_exec_context *exec )
for (i = 0 ; i < exec->vtx.prim_count ; i++) {
struct _mesa_prim *prim = &exec->vtx.prim[i];
printf(" prim %d: %s%s %d..%d %s %s\n",
- i,
+ i,
_mesa_lookup_prim_by_nr(prim->mode),
prim->weak ? " (weak)" : "",
- prim->start,
+ prim->start,
prim->start + prim->count,
prim->begin ? "BEGIN" : "(wrap)",
prim->end ? "END" : "(wrap)");
@@ -79,7 +79,6 @@ vbo_copy_vertices( struct vbo_exec_context *exec )
exec->vtx.prim[exec->vtx.prim_count-1].start *
exec->vtx.vertex_size);
-
switch (exec->ctx->Driver.CurrentExecPrimitive) {
case GL_POINTS:
return 0;
@@ -219,7 +218,7 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
exec->vtx.inputs[attr] = &arrays[attr];
if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
- /* a real buffer obj: Ptr is an offset, not a pointer*/
+ /* a real buffer obj: Ptr is an offset, not a pointer */
assert(exec->vtx.bufferobj->Mappings[MAP_INTERNAL].Pointer);
assert(offset >= 0);
arrays[attr].Ptr = (GLubyte *)
@@ -259,7 +258,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
{
if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
struct gl_context *ctx = exec->ctx;
-
+
if (ctx->Driver.FlushMappedBufferRange) {
GLintptr offset = exec->vtx.buffer_used -
exec->vtx.bufferobj->Mappings[MAP_INTERNAL].Offset;
@@ -277,7 +276,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE);
assert(exec->vtx.buffer_ptr != NULL);
-
+
ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj, MAP_INTERNAL);
exec->vtx.buffer_map = NULL;
exec->vtx.buffer_ptr = NULL;
@@ -299,7 +298,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
GL_MAP_FLUSH_EXPLICIT_BIT |
MESA_MAP_NOWAIT_BIT;
const GLenum usage = GL_STREAM_DRAW_ARB;
-
+
if (!_mesa_is_bufferobj(exec->vtx.bufferobj))
return;
@@ -323,7 +322,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
exec->vtx.buffer_ptr = exec->vtx.buffer_map = NULL;
}
}
-
+
if (!exec->vtx.buffer_map) {
/* Need to allocate a new VBO */
exec->vtx.buffer_used = 0;
@@ -381,14 +380,14 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
if (0)
vbo_exec_debug_verts( exec );
- if (exec->vtx.prim_count &&
+ if (exec->vtx.prim_count &&
exec->vtx.vert_count) {
- exec->vtx.copied.nr = vbo_copy_vertices( exec );
+ exec->vtx.copied.nr = vbo_copy_vertices( exec );
if (exec->vtx.copied.nr != exec->vtx.vert_count) {
struct gl_context *ctx = exec->ctx;
-
+
/* Before the update_state() as this may raise _NEW_VARYING_VP_INPUTS
* from _mesa_set_varying_vp_inputs().
*/
@@ -405,7 +404,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
printf("%s %d %d\n", __func__, exec->vtx.prim_count,
exec->vtx.vert_count);
- vbo_context(ctx)->draw_prims( ctx,
+ vbo_context(ctx)->draw_prims( ctx,
exec->vtx.prim,
exec->vtx.prim_count,
NULL,
@@ -433,7 +432,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
if (keepUnmapped || exec->vtx.vertex_size == 0)
exec->vtx.max_vert = 0;
else
- exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
+ exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
(exec->vtx.vertex_size * sizeof(GLfloat)));
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 1a70d168c55..fdc677f9a07 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -648,7 +648,8 @@ _save_upgrade_vertex(struct gl_context *ctx, GLuint attr, GLuint newsz)
/* Recalculate all the attrptr[] values:
*/
- for (i = 0, tmp = save->vertex; i < VBO_ATTRIB_MAX; i++) {
+ tmp = save->vertex;
+ for (i = 0; i < VBO_ATTRIB_MAX; i++) {
if (save->attrsz[i]) {
save->attrptr[i] = tmp;
tmp += save->attrsz[i];
@@ -1543,7 +1544,7 @@ vbo_print_vertex_list(struct gl_context *ctx, void *data, FILE *f)
node->vertex_store->bufferobj : NULL;
(void) ctx;
- fprintf(f, "VBO-VERTEX-LIST, %u vertices %d primitives, %d vertsize "
+ fprintf(f, "VBO-VERTEX-LIST, %u vertices, %d primitives, %d vertsize, "
"buffer %p\n",
node->count, node->prim_count, node->vertex_size,
buffer);
diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index e45431d1de8..a87114601c8 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -3,6 +3,8 @@ MESA_UTIL_FILES := \
debug.c \
debug.h \
format_srgb.h \
+ half_float.c \
+ half_float.h \
hash_table.c \
hash_table.h \
list.h \
diff --git a/src/util/half_float.c b/src/util/half_float.c
new file mode 100644
index 00000000000..4df64c2ccf9
--- /dev/null
+++ b/src/util/half_float.c
@@ -0,0 +1,177 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <math.h>
+#include <assert.h>
+#include "half_float.h"
+#include "rounding.h"
+
+typedef union { float f; int32_t i; uint32_t u; } fi_type;
+
+/**
+ * Convert a 4-byte float to a 2-byte half float.
+ *
+ * Not all float32 values can be represented exactly as a float16 value. We
+ * round such intermediate float32 values to the nearest float16. When the
+ * float32 lies exactly between to float16 values, we round to the one with
+ * an even mantissa.
+ *
+ * This rounding behavior has several benefits:
+ * - It has no sign bias.
+ *
+ * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
+ * GPU ISA.
+ *
+ * - By reproducing the behavior of the GPU (at least on Intel hardware),
+ * compile-time evaluation of constant packHalf2x16 GLSL expressions will
+ * result in the same value as if the expression were executed on the GPU.
+ */
+uint16_t
+_mesa_float_to_half(float val)
+{
+ const fi_type fi = {val};
+ const int flt_m = fi.i & 0x7fffff;
+ const int flt_e = (fi.i >> 23) & 0xff;
+ const int flt_s = (fi.i >> 31) & 0x1;
+ int s, e, m = 0;
+ uint16_t result;
+
+ /* sign bit */
+ s = flt_s;
+
+ /* handle special cases */
+ if ((flt_e == 0) && (flt_m == 0)) {
+ /* zero */
+ /* m = 0; - already set */
+ e = 0;
+ }
+ else if ((flt_e == 0) && (flt_m != 0)) {
+ /* denorm -- denorm float maps to 0 half */
+ /* m = 0; - already set */
+ e = 0;
+ }
+ else if ((flt_e == 0xff) && (flt_m == 0)) {
+ /* infinity */
+ /* m = 0; - already set */
+ e = 31;
+ }
+ else if ((flt_e == 0xff) && (flt_m != 0)) {
+ /* NaN */
+ m = 1;
+ e = 31;
+ }
+ else {
+ /* regular number */
+ const int new_exp = flt_e - 127;
+ if (new_exp < -14) {
+ /* The float32 lies in the range (0.0, min_normal16) and is rounded
+ * to a nearby float16 value. The result will be either zero, subnormal,
+ * or normal.
+ */
+ e = 0;
+ m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
+ }
+ else if (new_exp > 15) {
+ /* map this value to infinity */
+ /* m = 0; - already set */
+ e = 31;
+ }
+ else {
+ /* The float32 lies in the range
+ * [min_normal16, max_normal16 + max_step16)
+ * and is rounded to a nearby float16 value. The result will be
+ * either normal or infinite.
+ */
+ e = new_exp + 15;
+ m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
+ }
+ }
+
+ assert(0 <= m && m <= 1024);
+ if (m == 1024) {
+ /* The float32 was rounded upwards into the range of the next exponent,
+ * so bump the exponent. This correctly handles the case where f32
+ * should be rounded up to float16 infinity.
+ */
+ ++e;
+ m = 0;
+ }
+
+ result = (s << 15) | (e << 10) | m;
+ return result;
+}
+
+
+/**
+ * Convert a 2-byte half float to a 4-byte float.
+ * Based on code from:
+ * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
+ */
+float
+_mesa_half_to_float(uint16_t val)
+{
+ /* XXX could also use a 64K-entry lookup table */
+ const int m = val & 0x3ff;
+ const int e = (val >> 10) & 0x1f;
+ const int s = (val >> 15) & 0x1;
+ int flt_m, flt_e, flt_s;
+ fi_type fi;
+ float result;
+
+ /* sign bit */
+ flt_s = s;
+
+ /* handle special cases */
+ if ((e == 0) && (m == 0)) {
+ /* zero */
+ flt_m = 0;
+ flt_e = 0;
+ }
+ else if ((e == 0) && (m != 0)) {
+ /* denorm -- denorm half will fit in non-denorm single */
+ const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
+ float mantissa = ((float) (m)) / 1024.0f;
+ float sign = s ? -1.0f : 1.0f;
+ return sign * mantissa * half_denorm;
+ }
+ else if ((e == 31) && (m == 0)) {
+ /* infinity */
+ flt_e = 0xff;
+ flt_m = 0;
+ }
+ else if ((e == 31) && (m != 0)) {
+ /* NaN */
+ flt_e = 0xff;
+ flt_m = 1;
+ }
+ else {
+ /* regular */
+ flt_e = e + 112;
+ flt_m = m << 13;
+ }
+
+ fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
+ result = fi.f;
+ return result;
+}
diff --git a/src/util/half_float.h b/src/util/half_float.h
new file mode 100644
index 00000000000..64f20421018
--- /dev/null
+++ b/src/util/half_float.h
@@ -0,0 +1,41 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _HALF_FLOAT_H_
+#define _HALF_FLOAT_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+uint16_t _mesa_float_to_half(float val);
+float _mesa_half_to_float(uint16_t val);
+
+#ifdef __cplusplus
+} /* extern C */
+#endif
+
+#endif /* _HALF_FLOAT_H_ */
diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am
index 985864a87fe..5abbd379b54 100644
--- a/src/vulkan/Makefile.am
+++ b/src/vulkan/Makefile.am
@@ -42,6 +42,7 @@ AM_CPPFLAGS = \
$(DEFINES) \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/glsl/nir \
-I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/mesa \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp
index a3b8d1cc80c..2b8e7cee9aa 100644
--- a/src/vulkan/anv_compiler.cpp
+++ b/src/vulkan/anv_compiler.cpp
@@ -36,6 +36,7 @@
#include <brw_gs.h>
#include <brw_cs.h>
#include "brw_vec4_gs_visitor.h"
+#include <brw_compiler.h>
#include <mesa/main/shaderobj.h>
#include <mesa/main/fbobject.h>
@@ -307,8 +308,9 @@ really_do_vs_prog(struct brw_context *brw,
/* Emit GEN4 code.
*/
- program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program,
- prog, -1, &program_size);
+ program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx,
+ key, prog_data, vs->Program->nir, NULL, false, -1,
+ &program_size, NULL);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
@@ -562,8 +564,9 @@ really_do_wm_prog(struct brw_context *brw,
*/
prog_data->binding_table.render_target_start = 0;
- program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
- &fp->program, prog, -1, -1, &program_size);
+ program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx, key,
+ prog_data, fp->program.Base.nir, fs->Program,
+ -1, -1, brw->use_rep_send, &program_size, NULL);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
@@ -831,7 +834,8 @@ anv_codegen_gs_prog(struct brw_context *brw,
void *mem_ctx = ralloc_context(NULL);
unsigned program_size;
const unsigned *program =
- brw_gs_emit(brw, prog, &c, mem_ctx, -1, &program_size);
+ brw_compile_gs(brw->intelScreen->compiler, brw, &c, gp->program.Base.nir,
+ prog, mem_ctx, -1, &program_size, NULL);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
@@ -867,8 +871,9 @@ brw_codegen_cs_prog(struct brw_context *brw,
anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base);
anv_nir_apply_pipeline_layout(cs->Program->nir, pipeline->layout);
- program = brw_cs_emit(brw, mem_ctx, key, prog_data,
- &cp->program, prog, -1, &program_size);
+ program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx, key,
+ prog_data, cs->Program->nir, -1,
+ &program_size, NULL);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
@@ -1142,10 +1147,13 @@ setup_nir_io(struct gl_shader *mesa_shader,
prog->OutputsWritten |= BITFIELD64_BIT(var->data.location);
}
+ shader->info.system_values_read = 0;
+ foreach_list_typed(nir_variable, var, node, &shader->system_values) {
+ shader->info.system_values_read |= BITFIELD64_BIT(var->data.location);
+ }
+
shader->info.inputs_read = prog->InputsRead;
shader->info.outputs_written = prog->OutputsWritten;
-
- mesa_shader->num_uniform_components = shader->num_uniforms;
}
static void
@@ -1163,7 +1171,7 @@ anv_compile_shader_spirv(struct anv_compiler *compiler,
"failed to create %s shader\n", stage_info[stage].name);
#define CREATE_PROGRAM(stage) \
- _mesa_init_##stage##_program(&brw->ctx, &ralloc(mesa_shader, struct brw_##stage##_program)->program, 0, 0)
+ &ralloc(mesa_shader, struct brw_##stage##_program)->program.Base
bool is_scalar;
struct gl_program *prog;
@@ -1187,6 +1195,7 @@ anv_compile_shader_spirv(struct anv_compiler *compiler,
default:
unreachable("Unsupported shader stage");
}
+ _mesa_init_gl_program(prog, 0, 0);
_mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog);
mesa_shader->Program->Parameters =
@@ -1215,11 +1224,14 @@ anv_compile_shader_spirv(struct anv_compiler *compiler,
}
nir_validate_shader(mesa_shader->Program->nir);
+ setup_nir_io(mesa_shader, mesa_shader->Program->nir);
+
brw_process_nir(mesa_shader->Program->nir,
compiler->screen->devinfo,
NULL, mesa_shader->Stage, is_scalar);
- setup_nir_io(mesa_shader, mesa_shader->Program->nir);
+ mesa_shader->num_uniform_components =
+ mesa_shader->Program->nir->num_uniforms;
fail_if(mesa_shader->Program->nir == NULL,
"failed to translate SPIR-V to NIR\n");
diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c
index 8f6bc421194..76b8c4173e6 100644
--- a/src/vulkan/anv_meta.c
+++ b/src/vulkan/anv_meta.c
@@ -39,13 +39,11 @@ build_nir_vertex_shader(bool attr_flat)
nir_builder_init_simple_shader(&b, MESA_SHADER_VERTEX);
- nir_variable *pos_in = nir_variable_create(b.shader, "a_pos",
- vertex_type,
- nir_var_shader_in);
+ nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vertex_type, "a_pos");
pos_in->data.location = VERT_ATTRIB_GENERIC0;
- nir_variable *pos_out = nir_variable_create(b.shader, "gl_Position",
- vertex_type,
- nir_var_shader_out);
+ nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vertex_type, "gl_Position");
pos_in->data.location = VARYING_SLOT_POS;
nir_copy_var(&b, pos_out, pos_in);
@@ -53,11 +51,11 @@ build_nir_vertex_shader(bool attr_flat)
* to store the color and for blit shaders it's the texture coordinate.
*/
const struct glsl_type *attr_type = glsl_vec4_type();
- nir_variable *attr_in = nir_variable_create(b.shader, "a_attr", attr_type,
- nir_var_shader_in);
+ nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in,
+ attr_type, "a_attr");
attr_in->data.location = VERT_ATTRIB_GENERIC1;
- nir_variable *attr_out = nir_variable_create(b.shader, "v_attr", attr_type,
- nir_var_shader_out);
+ nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out,
+ attr_type, "v_attr");
attr_out->data.location = VARYING_SLOT_VAR0;
attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT :
INTERP_QUALIFIER_SMOOTH;
@@ -75,14 +73,12 @@ build_nir_clear_fragment_shader(void)
nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT);
- nir_variable *color_in = nir_variable_create(b.shader, "v_attr",
- color_type,
- nir_var_shader_in);
+ nir_variable *color_in = nir_variable_create(b.shader, nir_var_shader_in,
+ color_type, "v_attr");
color_in->data.location = VARYING_SLOT_VAR0;
color_in->data.interpolation = INTERP_QUALIFIER_FLAT;
- nir_variable *color_out = nir_variable_create(b.shader, "f_color",
- color_type,
- nir_var_shader_out);
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ color_type, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_copy_var(&b, color_out, color_in);
@@ -98,15 +94,14 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
const struct glsl_type *color_type = glsl_vec4_type();
- nir_variable *tex_pos_in = nir_variable_create(b.shader, "v_attr",
- glsl_vec4_type(),
- nir_var_shader_in);
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ glsl_vec4_type(), "v_attr");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
const struct glsl_type *sampler_type =
glsl_sampler_type(tex_dim, false, false, glsl_get_base_type(color_type));
- nir_variable *sampler = nir_variable_create(b.shader, "s_tex", sampler_type,
- nir_var_uniform);
+ nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
+ sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
@@ -133,9 +128,8 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
nir_builder_instr_insert(&b, &tex->instr);
- nir_variable *color_out = nir_variable_create(b.shader, "f_color",
- color_type,
- nir_var_shader_out);
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ color_type, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_store_var(&b, color_out, &tex->dest.ssa);
diff --git a/src/vulkan/anv_nir_builder.h b/src/vulkan/anv_nir_builder.h
index 299c8c1aad0..f26cb046a6b 100644
--- a/src/vulkan/anv_nir_builder.h
+++ b/src/vulkan/anv_nir_builder.h
@@ -54,49 +54,3 @@ nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src)
copy->variables[1] = nir_deref_var_create(copy, src);
nir_builder_instr_insert(build, &copy->instr);
}
-
-static inline nir_variable *
-nir_variable_create(nir_shader *shader, const char *name,
- const struct glsl_type *type, nir_variable_mode mode)
-{
- nir_variable *var = rzalloc(shader, nir_variable);
- var->name = ralloc_strdup(var, name);
- var->type = type;
- var->data.mode = mode;
-
- if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) ||
- (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT))
- var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
-
- switch (var->data.mode) {
- case nir_var_local:
- assert(!"nir_variable_create cannot be used for local variables");
- break;
-
- case nir_var_global:
- exec_list_push_tail(&shader->globals, &var->node);
- break;
-
- case nir_var_shader_in:
- exec_list_push_tail(&shader->inputs, &var->node);
- break;
-
- case nir_var_shader_out:
- exec_list_push_tail(&shader->outputs, &var->node);
- break;
-
- case nir_var_uniform:
- case nir_var_shader_storage:
- exec_list_push_tail(&shader->uniforms, &var->node);
- break;
-
- case nir_var_system_value:
- exec_list_push_tail(&shader->system_values, &var->node);
- break;
-
- default:
- unreachable("not reached");
- }
-
- return var;
-}