aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/Android.mk5
-rw-r--r--src/gallium/drivers/freedreno/Makefile.am2
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h18
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_blend.h2
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_context.c4
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_context.h2
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h2
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_texture.h4
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_zsa.h2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h14
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_blend.h6
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.c3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.h2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c16
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c184
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.h6
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_format.h23
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.c9
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c12
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_query.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_screen.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.h4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_zsa.h2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h263
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.c18
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.h6
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.h2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c46
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.h8
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c333
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.h14
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_format.c27
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_gmem.c205
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.c127
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.h3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_query.c86
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c8
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_screen.c2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.c4
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.h4
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_zsa.h2
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h18
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h18
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.c21
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h35
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c76
-rw-r--r--src/gallium/drivers/freedreno/freedreno_fence.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.c15
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.h6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_program.c6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.c24
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.h25
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c45
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.h10
-rw-r--r--src/gallium/drivers/freedreno/freedreno_state.c65
-rw-r--r--src/gallium/drivers/freedreno/freedreno_surface.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_surface.h2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.h60
-rw-r--r--src/gallium/drivers/freedreno/ir3/disasm-a3xx.c223
-rw-r--r--src/gallium/drivers/freedreno/ir3/instr-a3xx.h87
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c70
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h38
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cmdline.c159
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.h5
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c251
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c10
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_depth.c12
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_group.c5
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_legalize.c4
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_print.c14
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c185
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_sched.c88
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c410
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h46
-rw-r--r--src/gallium/drivers/i915/i915_batchbuffer.h16
-rw-r--r--src/gallium/drivers/i915/i915_context.h4
-rw-r--r--src/gallium/drivers/i915/i915_debug.h6
-rw-r--r--src/gallium/drivers/i915/i915_fpc.h2
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c2
-rw-r--r--src/gallium/drivers/i915/i915_prim_emit.c6
-rw-r--r--src/gallium/drivers/i915/i915_prim_vbuf.c2
-rw-r--r--src/gallium/drivers/i915/i915_resource.h4
-rw-r--r--src/gallium/drivers/i915/i915_resource_texture.c8
-rw-r--r--src/gallium/drivers/i915/i915_screen.c17
-rw-r--r--src/gallium/drivers/i915/i915_screen.h2
-rw-r--r--src/gallium/drivers/i915/i915_state_dynamic.c4
-rw-r--r--src/gallium/drivers/i915/i915_state_immediate.c2
-rw-r--r--src/gallium/drivers/i915/i915_state_inlines.h14
-rw-r--r--src/gallium/drivers/ilo/Makefile.am2
-rw-r--r--src/gallium/drivers/ilo/Makefile.sources6
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h43
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_3d_top.h65
-rw-r--r--src/gallium/drivers/ilo/core/ilo_core.h6
-rw-r--r--src/gallium/drivers/ilo/core/ilo_image.c1779
-rw-r--r--src/gallium/drivers/ilo/core/ilo_image.h84
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_sol.c45
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_sol.h26
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_surface.c361
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_surface.h41
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_vf.c52
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_vf.h24
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_zs.c226
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_zs.h38
-rw-r--r--src/gallium/drivers/ilo/core/ilo_vma.h (renamed from src/gallium/drivers/ilo/core/ilo_buffer.h)57
-rw-r--r--src/gallium/drivers/ilo/ilo_blitter_blt.c43
-rw-r--r--src/gallium/drivers/ilo/ilo_blitter_rectlist.c4
-rw-r--r--src/gallium/drivers/ilo/ilo_common.h8
-rw-r--r--src/gallium/drivers/ilo/ilo_context.c2
-rw-r--r--src/gallium/drivers/ilo/ilo_draw.c11
-rw-r--r--src/gallium/drivers/ilo/ilo_format.h35
-rw-r--r--src/gallium/drivers/ilo/ilo_render_surface.c27
-rw-r--r--src/gallium/drivers/ilo/ilo_resource.c309
-rw-r--r--src/gallium/drivers/ilo/ilo_resource.h25
-rw-r--r--src/gallium/drivers/ilo/ilo_screen.c20
-rw-r--r--src/gallium/drivers/ilo/ilo_state.c134
-rw-r--r--src/gallium/drivers/ilo/ilo_state.h2
-rw-r--r--src/gallium/drivers/ilo/ilo_transfer.c71
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_debug.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.h16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_point.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_vbuf.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.h14
-rw-r--r--src/gallium/drivers/nouveau/Makefile.am2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.cpp4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp7
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h26
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp33
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp25
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp12
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp18
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp118
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp72
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp9
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.c137
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.h10
-rw-r--r--src/gallium/drivers/nouveau/nouveau_context.h12
-rw-r--r--src/gallium/drivers/nouveau/nouveau_fence.c36
-rw-r--r--src/gallium/drivers/nouveau/nouveau_fence.h20
-rw-r--r--src/gallium/drivers/nouveau/nouveau_gldefs.h14
-rw-r--r--src/gallium/drivers/nouveau/nouveau_mm.c8
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c20
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h8
-rw-r--r--src/gallium/drivers/nouveau/nouveau_statebuf.h2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_video.c66
-rw-r--r--src/gallium/drivers/nouveau/nouveau_video.h12
-rw-r--r--src/gallium/drivers/nouveau/nouveau_vp3_video.h12
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h16
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h2
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_clear.c6
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_context.c10
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_context.h15
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_draw.c24
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_format.h8
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_fragprog.c39
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_miptree.c14
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_push.c12
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_query.c6
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_resource.c4
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_resource.h6
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c17
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.h2
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_state.c8
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_state.h8
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_state_validate.c8
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_texture.c8
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_transfer.c62
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_vbo.c41
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_vertprog.c12
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_winsys.h22
-rw-r--r--src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c56
-rw-r--r--src/gallium/drivers/nouveau/nv30/nvfx_shader.h16
-rw-r--r--src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c52
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_blit.h24
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c14
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h20
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_formats.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_miptree.c28
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c20
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.h6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_push.c16
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.c71
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_resource.h12
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c22
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h32
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_shader_state.c22
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c44
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c36
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_stateobj.h10
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c77
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_tex.c35
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c43
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_winsys.h22
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video.h4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video_vp.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c24
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c12
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h34
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c14
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c94
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.h6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c88
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c52
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h31
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c17
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c120
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c59
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h12
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c65
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c76
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c8
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c49
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c51
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h26
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c28
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.h6
-rw-r--r--src/gallium/drivers/r300/Makefile.am2
-rw-r--r--src/gallium/drivers/r300/r300_blit.c2
-rw-r--r--src/gallium/drivers/r300/r300_context.c8
-rw-r--r--src/gallium/drivers/r300/r300_context.h20
-rw-r--r--src/gallium/drivers/r300/r300_cs.h2
-rw-r--r--src/gallium/drivers/r300/r300_fs.h4
-rw-r--r--src/gallium/drivers/r300/r300_query.c7
-rw-r--r--src/gallium/drivers/r300/r300_render.c4
-rw-r--r--src/gallium/drivers/r300/r300_screen.c15
-rw-r--r--src/gallium/drivers/r300/r300_screen.h8
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c2
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.h2
-rw-r--r--src/gallium/drivers/r300/r300_shader_semantics.h2
-rw-r--r--src/gallium/drivers/r300/r300_state.c2
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h28
-rw-r--r--src/gallium/drivers/r300/r300_texture.c2
-rw-r--r--src/gallium/drivers/r300/r300_transfer.c8
-rw-r--r--src/gallium/drivers/r600/Makefile.am2
-rw-r--r--src/gallium/drivers/r600/eg_asm.c17
-rw-r--r--src/gallium/drivers/r600/eg_sq.h7
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c29
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c98
-rw-r--r--src/gallium/drivers/r600/evergreend.h5
-rw-r--r--src/gallium/drivers/r600/r600_blit.c18
-rw-r--r--src/gallium/drivers/r600/r600_formats.h4
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c75
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c191
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c13
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h128
-rw-r--r--src/gallium/drivers/r600/r600_shader.c22
-rw-r--r--src/gallium/drivers/r600/r600_shader.h1
-rw-r--r--src/gallium/drivers/r600/r600_state.c36
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c115
-rw-r--r--src/gallium/drivers/r600/sb/sb_sched.cpp2
-rw-r--r--src/gallium/drivers/radeon/r600_buffer_common.c15
-rw-r--r--src/gallium/drivers/radeon/r600_cs.h28
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c134
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h93
-rw-r--r--src/gallium/drivers/radeon/r600_query.c137
-rw-r--r--src/gallium/drivers/radeon/r600_streamout.c55
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c35
-rw-r--r--src/gallium/drivers/radeon/r600d_common.h5
-rw-r--r--src/gallium/drivers/radeon/radeon_elf_util.c13
-rw-r--r--src/gallium/drivers/radeon/radeon_elf_util.h2
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm.h53
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.c13
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.h10
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c532
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.c383
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.h65
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.c76
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.h30
-rw-r--r--src/gallium/drivers/radeon/radeon_vce_40_2_2.c58
-rw-r--r--src/gallium/drivers/radeon/radeon_vce_50.c61
-rw-r--r--src/gallium/drivers/radeon/radeon_video.c20
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h71
-rw-r--r--src/gallium/drivers/radeonsi/Automake.inc6
-rw-r--r--src/gallium/drivers/radeonsi/Makefile.sources1
-rw-r--r--src/gallium/drivers/radeonsi/cik_sdma.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c231
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c265
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c774
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c71
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c97
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h61
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c1646
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h143
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c457
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h76
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c271
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c664
-rw-r--r--src/gallium/drivers/radeonsi/sid.h2170
-rw-r--r--src/gallium/drivers/rbug/rbug_context.h2
-rw-r--r--src/gallium/drivers/rbug/rbug_objects.h20
-rw-r--r--src/gallium/drivers/rbug/rbug_screen.c12
-rw-r--r--src/gallium/drivers/rbug/rbug_screen.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_fence.c10
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_fs.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c10
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c56
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.h4
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.h6
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.h4
-rw-r--r--src/gallium/drivers/svga/Makefile.am2
-rw-r--r--src/gallium/drivers/svga/SConscript1
-rw-r--r--src/gallium/drivers/svga/include/svga3d_shaderdefs.h2
-rw-r--r--src/gallium/drivers/svga/include/svga_overlay.h2
-rw-r--r--src/gallium/drivers/svga/svga_cmd.c2
-rw-r--r--src/gallium/drivers/svga/svga_context.h6
-rw-r--r--src/gallium/drivers/svga/svga_debug.h2
-rw-r--r--src/gallium/drivers/svga/svga_draw_private.h2
-rw-r--r--src/gallium/drivers/svga/svga_pipe_blend.c4
-rw-r--r--src/gallium/drivers/svga/svga_pipe_depthstencil.c4
-rw-r--r--src/gallium/drivers/svga/svga_pipe_query.c2
-rw-r--r--src/gallium/drivers/svga/svga_pipe_sampler.c6
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c2
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.h12
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.h16
-rw-r--r--src/gallium/drivers/svga/svga_sampler_view.h2
-rw-r--r--src/gallium/drivers/svga/svga_screen.c21
-rw-r--r--src/gallium/drivers/svga/svga_screen.h2
-rw-r--r--src/gallium/drivers/svga/svga_screen_cache.c2
-rw-r--r--src/gallium/drivers/svga/svga_shader.h2
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_rss.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_tss.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c2
-rw-r--r--src/gallium/drivers/svga/svga_surface.h4
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_private.h2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.c2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.h6
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_emit.h20
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_shader.h6
-rw-r--r--src/gallium/drivers/trace/tr_context.c100
-rw-r--r--src/gallium/drivers/trace/tr_context.h2
-rw-r--r--src/gallium/drivers/trace/tr_dump.c24
-rw-r--r--src/gallium/drivers/trace/tr_dump_defines.h4
-rw-r--r--src/gallium/drivers/trace/tr_screen.c24
-rw-r--r--src/gallium/drivers/trace/tr_texture.h8
-rw-r--r--src/gallium/drivers/vc4/Makefile.am4
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources3
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_drv.h24
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_gem.c2
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_packet.h17
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_render_cl.c56
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_validate.c315
-rw-r--r--src/gallium/drivers/vc4/vc4_blit.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.c162
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.h9
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.c20
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.h150
-rw-r--r--src/gallium/drivers/vc4/vc4_cl_dump.c146
-rw-r--r--src/gallium/drivers/vc4/vc4_context.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h40
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c227
-rw-r--r--src/gallium/drivers/vc4/vc4_drm.h25
-rw-r--r--src/gallium/drivers/vc4/vc4_emit.c65
-rw-r--r--src/gallium/drivers/vc4/vc4_fence.c13
-rw-r--r--src/gallium/drivers/vc4/vc4_formats.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_job.c16
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c431
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c291
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_copy_propagation.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_cse.c19
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c993
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c18
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h95
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.h30
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_disasm.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c59
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_validate.c7
-rw-r--r--src/gallium/drivers/vc4/vc4_register_allocate.c89
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.h6
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c10
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator.c16
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator_validate.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_state.c92
-rw-r--r--src/gallium/drivers/vc4/vc4_tiling.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_tiling.h6
-rw-r--r--src/gallium/drivers/vc4/vc4_uniforms.c344
414 files changed, 16038 insertions, 7910 deletions
diff --git a/src/gallium/drivers/freedreno/Android.mk b/src/gallium/drivers/freedreno/Android.mk
index a6712b2c115..ed51835e1fb 100644
--- a/src/gallium/drivers/freedreno/Android.mk
+++ b/src/gallium/drivers/freedreno/Android.mk
@@ -28,7 +28,9 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
$(C_SOURCES) \
$(a2xx_SOURCES) \
- $(a3xx_SOURCES)
+ $(a3xx_SOURCES) \
+ $(a4xx_SOURCES) \
+ $(ir3_SOURCES)
LOCAL_CFLAGS := \
-Wno-packed-bitfield-compat
@@ -37,6 +39,7 @@ LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/ir3
LOCAL_SHARED_LIBRARIES := libdrm libdrm_freedreno
+LOCAL_STATIC_LIBRARIES := libmesa_glsl
LOCAL_MODULE := libmesa_pipe_freedreno
include $(GALLIUM_COMMON_MK)
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am
index cbf62c6daae..dff95ba5270 100644
--- a/src/gallium/drivers/freedreno/Makefile.am
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index f4f6b94c1ea..c4516baf2ec 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -8,15 +8,15 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
-
-Copyright (C) 2013-2014 by the following authors:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
+
+Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_blend.h b/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
index 7cafcd3747e..3c8d8f7c09f 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
@@ -39,7 +39,7 @@ struct fd2_blend_stateobj {
uint32_t rb_colormask;
};
-static INLINE struct fd2_blend_stateobj *
+static inline struct fd2_blend_stateobj *
fd2_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd2_blend_stateobj *)blend;
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_context.c b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
index a0bf01ffd1f..6089ebc1516 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
@@ -67,7 +67,7 @@ create_solid_vertexbuf(struct pipe_context *pctx)
}
static const uint8_t a22x_primtypes[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A2XX,
+ [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_PSIZE,
[PIPE_PRIM_LINES] = DI_PT_LINELIST,
[PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
@@ -77,7 +77,7 @@ static const uint8_t a22x_primtypes[PIPE_PRIM_MAX] = {
};
static const uint8_t a20x_primtypes[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A2XX,
+ [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_PSIZE,
[PIPE_PRIM_LINES] = DI_PT_LINELIST,
[PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
[PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST,
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_context.h b/src/gallium/drivers/freedreno/a2xx/fd2_context.h
index de845f07a85..74147107930 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.h
@@ -40,7 +40,7 @@ struct fd2_context {
struct pipe_resource *solid_vertexbuf;
};
-static INLINE struct fd2_context *
+static inline struct fd2_context *
fd2_context(struct fd_context *ctx)
{
return (struct fd2_context *)ctx;
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h b/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h
index adc0653132b..9e53cd3be75 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h
@@ -43,7 +43,7 @@ struct fd2_rasterizer_stateobj {
uint32_t pa_su_sc_mode_cntl;
};
-static INLINE struct fd2_rasterizer_stateobj *
+static inline struct fd2_rasterizer_stateobj *
fd2_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd2_rasterizer_stateobj *)rast;
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_texture.h b/src/gallium/drivers/freedreno/a2xx/fd2_texture.h
index 4fffa08b3c3..5c9236851bd 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_texture.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_texture.h
@@ -42,7 +42,7 @@ struct fd2_sampler_stateobj {
uint32_t tex0, tex3, tex4, tex5;
};
-static INLINE struct fd2_sampler_stateobj *
+static inline struct fd2_sampler_stateobj *
fd2_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd2_sampler_stateobj *)samp;
@@ -54,7 +54,7 @@ struct fd2_pipe_sampler_view {
uint32_t tex0, tex2, tex3;
};
-static INLINE struct fd2_pipe_sampler_view *
+static inline struct fd2_pipe_sampler_view *
fd2_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd2_pipe_sampler_view *)pview;
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h b/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h
index dda1e552174..15609ad0267 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h
@@ -44,7 +44,7 @@ struct fd2_zsa_stateobj {
uint32_t rb_stencilrefmask_bf;
};
-static INLINE struct fd2_zsa_stateobj *
+static inline struct fd2_zsa_stateobj *
fd2_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd2_zsa_stateobj *)zsa;
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index a3bc74eda85..8e8cf6a03f2 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -8,13 +8,13 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
index 4f6eeb74481..142df7c300f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
@@ -32,6 +32,8 @@
#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "freedreno_util.h"
+
struct fd3_blend_stateobj {
struct pipe_blend_state base;
struct {
@@ -42,10 +44,10 @@ struct fd3_blend_stateobj {
/* Blend control bits for alpha channel */
uint32_t blend_control_alpha;
uint32_t control;
- } rb_mrt[4];
+ } rb_mrt[A3XX_MAX_RENDER_TARGETS];
};
-static INLINE struct fd3_blend_stateobj *
+static inline struct fd3_blend_stateobj *
fd3_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd3_blend_stateobj *)blend;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 7e5a99ea571..dc33783e398 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -88,7 +88,7 @@ create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
}
static const uint8_t primtypes[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A3XX,
+ [PIPE_PRIM_POINTS] = DI_PT_POINTLIST,
[PIPE_PRIM_LINES] = DI_PT_LINELIST,
[PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
@@ -121,6 +121,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
fd3_gmem_init(pctx);
fd3_texture_init(pctx);
fd3_prog_init(pctx);
+ fd3_emit_init(pctx);
pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv);
if (!pctx)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
index 77e4605e550..6e20b2ff9bc 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -112,7 +112,7 @@ struct fd3_context {
struct ir3_shader_key last_key;
};
-static INLINE struct fd3_context *
+static inline struct fd3_context *
fd3_context(struct fd_context *ctx)
{
return (struct fd3_context *)ctx;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index b5838b58eb2..a9498835011 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -60,6 +60,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode];
+ if (!(fd3_emit_get_vp(emit) && fd3_emit_get_fp(emit)))
+ return;
+
fd3_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
@@ -79,8 +82,8 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
info->restart_index : 0xffffffff);
if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
- info->mode == PIPE_PRIM_POINTS)
- primtype = DI_PT_POINTLIST_A2XX;
+ (info->mode == PIPE_PRIM_POINTS))
+ primtype = DI_PT_POINTLIST_PSIZE;
fd_draw_emit(ctx, ring,
primtype,
@@ -240,10 +243,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
.vtx = &fd3_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = {
- .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
- fd3_half_precision(pfb->cbufs[1]) &&
- fd3_half_precision(pfb->cbufs[2]) &&
- fd3_half_precision(pfb->cbufs[3])),
+ .half_precision = fd_half_precision(pfb),
},
};
@@ -321,7 +321,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
}
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
@@ -342,7 +342,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
fd3_emit_vertex_bufs(ring, &emit);
- fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+ fd3_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 07cc2266d08..752e7f88cb9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -43,19 +43,26 @@
#include "fd3_format.h"
#include "fd3_zsa.h"
+static const enum adreno_state_block sb[] = {
+ [SHADER_VERTEX] = SB_VERT_SHADER,
+ [SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
*/
void
-fd3_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc)
{
uint32_t i, sz;
enum adreno_state_src src;
+ debug_assert((regid % 4) == 0);
+ debug_assert((sizedwords % 4) == 0);
+
if (prsc) {
sz = 0;
src = SS_INDIRECT;
@@ -67,7 +74,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
CP_LOAD_STATE_0_STATE_SRC(src) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
if (prsc) {
struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
}
static void
-emit_constants(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
- struct fd_constbuf_stateobj *constbuf,
- struct ir3_shader_variant *shader,
- bool emit_immediates)
+fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+ uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
{
- uint32_t enabled_mask = constbuf->enabled_mask;
- uint32_t max_const;
- int i;
-
- // XXX TODO only emit dirty consts.. but we need to keep track if
- // they are clobbered by a clear, gmem2mem, or mem2gmem..
- constbuf->dirty_mask = enabled_mask;
-
- /* in particular, with binning shader we may end up with unused
- * consts, ie. we could end up w/ constlen that is smaller
- * than first_immediate. In that case truncate the user consts
- * early to avoid HLSQ lockup caused by writing too many consts
- */
- max_const = MIN2(shader->first_driver_param, shader->constlen);
-
- /* emit user constants: */
- if (enabled_mask & 1) {
- const unsigned index = 0;
- struct pipe_constant_buffer *cb = &constbuf->cb[index];
- unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+ uint32_t i;
- // I expect that size should be a multiple of vec4's:
- assert(size == align(size, 4));
+ debug_assert((regid % 4) == 0);
+ debug_assert((num % 4) == 0);
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, 4 * max_const);
-
- if (size && constbuf->dirty_mask & (1 << index)) {
- fd3_emit_constant(ring, sb, 0,
- cb->buffer_offset, size,
- cb->user_buffer, cb->buffer);
- constbuf->dirty_mask &= ~(1 << index);
- }
-
- enabled_mask &= ~(1 << index);
- }
-
- if (shader->constlen > shader->first_driver_param) {
- uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
- /* emit ubos: */
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE_0_NUM_UNIT(params * 2));
- OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
- for (i = 1; i <= params * 4; i++) {
- struct pipe_constant_buffer *cb = &constbuf->cb[i];
- assert(!cb->user_buffer);
- if ((enabled_mask & (1 << i)) && cb->buffer)
- OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
- else
- OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
- }
- }
-
- /* emit shader immediates: */
- if (shader && emit_immediates) {
- int size = shader->immediates_count;
- uint32_t base = shader->first_immediate;
-
- /* truncate size to avoid writing constants that shader
- * does not use:
- */
- size = MIN2(size + base, shader->constlen) - base;
-
- /* convert out of vec4: */
- base *= 4;
- size *= 4;
-
- if (size > 0) {
- fd3_emit_constant(ring, sb, base,
- 0, size, shader->immediates[0].val, NULL);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+ CP_LOAD_STATE_0_NUM_UNIT(num/2));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+ for (i = 0; i < num; i++) {
+ if (bos[i]) {
+ if (write) {
+ OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+ } else {
+ OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+ }
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
}
}
}
@@ -302,14 +251,15 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_textures; i++) {
static const struct fd3_pipe_sampler_view dummy_view = {
+ .base.target = PIPE_TEXTURE_1D, /* anything !PIPE_BUFFER */
.base.u.tex.first_level = 1,
};
const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
fd3_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
struct fd_resource *rsc = fd_resource(view->base.texture);
- unsigned start = view->base.u.tex.first_level;
- unsigned end = view->base.u.tex.last_level;
+ unsigned start = fd_sampler_first_level(&view->base);
+ unsigned end = fd_sampler_last_level(&view->base);;
for (j = 0; j < (end - start + 1); j++) {
struct fd_resource_slice *slice =
@@ -392,6 +342,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
format = fd3_gmem_restore_format(rsc->base.b.format);
}
+ /* note: PIPE_BUFFER disallowed for surfaces */
unsigned lvl = psurf[i]->u.tex.level;
struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
@@ -444,7 +395,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
uint32_t total_in = 0;
const struct fd_vertex_state *vtx = emit->vtx;
struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
- unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+ unsigned vertex_regid = regid(63, 0);
+ unsigned instance_regid = regid(63, 0);
+ unsigned vtxcnt_regid = regid(63, 0);
for (i = 0; i < vp->inputs_count; i++) {
uint8_t semantic = sem2name(vp->inputs[i].semantic);
@@ -452,14 +405,17 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
vertex_regid = vp->inputs[i].regid;
else if (semantic == TGSI_SEMANTIC_INSTANCEID)
instance_regid = vp->inputs[i].regid;
+ else if (semantic == IR3_SEMANTIC_VTXCNT)
+ vtxcnt_regid = vp->inputs[i].regid;
else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
last = i;
}
/* hw doesn't like to be configured for zero vbo's, it seems: */
- if (vtx->vtx->num_elements == 0 &&
- vertex_regid == regid(63, 0) &&
- instance_regid == regid(63, 0))
+ if ((vtx->vtx->num_elements == 0) &&
+ (vertex_regid == regid(63, 0)) &&
+ (instance_regid == regid(63, 0)) &&
+ (vtxcnt_regid == regid(63, 0)))
return;
for (i = 0, j = 0; i <= last; i++) {
@@ -472,8 +428,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
enum pipe_format pfmt = elem->src_format;
enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
bool switchnext = (i != last) ||
- vertex_regid != regid(63, 0) ||
- instance_regid != regid(63, 0);
+ (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
bool isint = util_format_is_pure_integer(pfmt);
uint32_t fs = util_format_get_blocksize(pfmt);
@@ -512,6 +469,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
A3XX_VFD_CONTROL_1_REGID4INST(instance_regid));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
+ OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
+ A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(vtxcnt_regid));
}
void
@@ -669,33 +630,12 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, HLSQ_FLUSH);
- if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
- /* evil hack to deal sanely with clear path: */
- (emit->prog == &ctx->prog)) {
- fd_wfi(ctx, ring);
- emit_constants(ring, SB_VERT_SHADER,
- &ctx->constbuf[PIPE_SHADER_VERTEX],
- vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
- if (!emit->key.binning_pass) {
- emit_constants(ring, SB_FRAG_SHADER,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
- }
- }
-
- /* emit driver params every time */
- if (emit->info && emit->prog == &ctx->prog) {
- uint32_t vertex_params[4] = {
- emit->info->indexed ? emit->info->index_bias : emit->info->start,
- 0,
- 0,
- 0
- };
- if (vp->constlen >= vp->first_driver_param + 4) {
- fd3_emit_constant(ring, SB_VERT_SHADER,
- (vp->first_driver_param + 4) * 4,
- 0, 4, vertex_params, NULL);
- }
+ if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+ ir3_emit_consts(vp, ring, emit->info, dirty);
+ if (!emit->key.binning_pass)
+ ir3_emit_consts(fp, ring, emit->info, dirty);
+ /* mark clean after emitting consts: */
+ ctx->prog.dirty = 0;
}
if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
@@ -930,3 +870,11 @@ fd3_emit_restore(struct fd_context *ctx)
ctx->needs_rb_fbd = true;
}
+
+void
+fd3_emit_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->emit_const = fd3_emit_const;
+ ctx->emit_const_bo = fd3_emit_const_bo;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index 8f21919c9a7..795654706a7 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -37,10 +37,8 @@
#include "ir3_shader.h"
struct fd_ringbuffer;
-enum adreno_state_block;
-void fd3_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc);
@@ -90,4 +88,6 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
void fd3_emit_restore(struct fd_context *ctx);
+void fd3_emit_init(struct pipe_context *pctx);
+
#endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
index 6afc3015901..05c5ea3d247 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
@@ -41,27 +41,4 @@ enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
-static INLINE bool
-fd3_half_precision(const struct pipe_surface *surface)
-{
- enum pipe_format format;
- if (!surface)
- return true;
-
- format = surface->format;
-
- /* colors are provided in consts, which go through cov.f32f16, which will
- * break these values
- */
- if (util_format_is_pure_integer(format))
- return false;
-
- /* avoid losing precision on 32-bit float formats */
- if (util_format_is_float(format) &&
- util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
- return false;
-
- return true;
-}
-
#endif /* FD3_FORMAT_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 7d3975761dd..9a5b45e2fcb 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -57,7 +57,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
tile_mode = LINEAR;
}
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
enum pipe_format pformat = 0;
enum a3xx_color_fmt format = 0;
enum a3xx_color_swap swap = WZYX;
@@ -537,10 +537,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
/* NOTE: They all use the same VP, this is for vtx bufs. */
.prog = &ctx->blit_prog[0],
.key = {
- .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
- fd3_half_precision(pfb->cbufs[1]) &&
- fd3_half_precision(pfb->cbufs[2]) &&
- fd3_half_precision(pfb->cbufs[3]))
+ .half_precision = fd_half_precision(pfb),
},
};
float x0, y0, x1, y1;
@@ -654,6 +651,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
+ emit.fp = NULL; /* frag shader changed so clear cache */
fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
}
@@ -674,6 +672,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
emit.prog = &ctx->blit_zs;
emit.key.half_precision = false;
}
+ emit.fp = NULL; /* frag shader changed so clear cache */
fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 57fcaa9020e..b5360797745 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -51,7 +51,7 @@ create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state
enum shader_t type)
{
struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
- so->shader = ir3_shader_create(pctx, cso->tokens, type);
+ so->shader = ir3_shader_create(pctx, cso, type);
return so;
}
@@ -136,6 +136,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
int constmode;
int i, j, k;
+ debug_assert(nr <= ARRAY_SIZE(color_regid));
+
vp = fd3_emit_get_vp(emit);
if (emit->key.binning_pass) {
@@ -202,12 +204,12 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
} else {
- for (int i = 0; i < fp->outputs_count; i++) {
+ for (i = 0; i < fp->outputs_count; i++) {
ir3_semantic sem = fp->outputs[i].semantic;
unsigned idx = sem2idx(sem);
if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
continue;
- assert(idx < 4);
+ debug_assert(idx < ARRAY_SIZE(color_regid));
color_regid[idx] = fp->outputs[i].regid;
}
}
@@ -449,10 +451,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
}
- OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
- OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
- A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
-
if (vpbuffer == BUFFER)
emit_shader(ring, vp);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_query.c b/src/gallium/drivers/freedreno/a3xx/fd3_query.c
index 7abab543427..8fc0a0d4229 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_query.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_query.c
@@ -64,7 +64,7 @@ occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
OUT_PKT3(ring, CP_DRAW_INDX, 3);
OUT_RING(ring, 0x00000000);
- OUT_RING(ring, DRAW(DI_PT_POINTLIST_A2XX, DI_SRC_SEL_AUTO_INDEX,
+ OUT_RING(ring, DRAW(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
INDEX_SIZE_IGN, USE_VISIBILITY, 0));
OUT_RING(ring, 0); /* NumIndices */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
index 7e9c1f51f59..765d9719524 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
@@ -44,7 +44,7 @@ struct fd3_rasterizer_stateobj {
uint32_t pc_prim_vtx_cntl;
};
-static INLINE struct fd3_rasterizer_stateobj *
+static inline struct fd3_rasterizer_stateobj *
fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd3_rasterizer_stateobj *)rast;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
index 094dcf376e5..722fe360202 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
@@ -105,7 +105,7 @@ void
fd3_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
- screen->max_rts = 4;
+ screen->max_rts = A3XX_MAX_RENDER_TARGETS;
screen->compiler = ir3_compiler_create(screen->gpu_id);
pscreen->context_create = fd3_context_create;
pscreen->is_format_supported = fd3_screen_is_format_supported;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index a278bf5c603..c30658d0e7b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -210,8 +210,8 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = cso->u.tex.first_level;
- unsigned miplevels = cso->u.tex.last_level - lvl;
+ unsigned lvl = fd_sampler_first_level(cso);
+ unsigned miplevels = fd_sampler_last_level(cso) - lvl;
uint32_t sz2 = 0;
if (!so)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.h b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
index c38fd847f27..d5afb03cd7a 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
@@ -43,7 +43,7 @@ struct fd3_sampler_stateobj {
bool saturate_s, saturate_t, saturate_r;
};
-static INLINE struct fd3_sampler_stateobj *
+static inline struct fd3_sampler_stateobj *
fd3_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd3_sampler_stateobj *)samp;
@@ -54,7 +54,7 @@ struct fd3_pipe_sampler_view {
uint32_t texconst0, texconst1, texconst2, texconst3;
};
-static INLINE struct fd3_pipe_sampler_view *
+static inline struct fd3_pipe_sampler_view *
fd3_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd3_pipe_sampler_view *)pview;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
index 352c3dd5432..d4dc5954da5 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
@@ -45,7 +45,7 @@ struct fd3_zsa_stateobj {
uint32_t rb_stencilrefmask_bf;
};
-static INLINE struct fd3_zsa_stateobj *
+static inline struct fd3_zsa_stateobj *
fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd3_zsa_stateobj *)zsa;
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 0e7d3cf6db1..563f70ac5eb 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -8,13 +8,13 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
@@ -227,6 +227,7 @@ enum a4xx_depth_format {
DEPTH4_NONE = 0,
DEPTH4_16 = 1,
DEPTH4_24_8 = 2,
+ DEPTH4_32 = 3,
};
enum a4xx_tess_spacing {
@@ -570,6 +571,15 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
return ((val) << A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT) & A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK;
}
+#define REG_A4XX_RB_SAMPLE_COUNT_CONTROL 0x000020fa
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK 0xfffffffc
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT 2
+static inline uint32_t A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR(uint32_t val)
+{
+ return ((val >> 2) << A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT) & A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK;
+}
+
#define REG_A4XX_RB_RENDER_COMPONENTS 0x000020fb
#define A4XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f
#define A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0
@@ -811,6 +821,23 @@ static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op v
#define REG_A4XX_RB_STENCIL_CONTROL2 0x00002107
#define A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER 0x00000001
+#define REG_A4XX_RB_STENCIL_INFO 0x00002108
+#define A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001
+#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK 0xfffff000
+#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT 12
+static inline uint32_t A4XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val)
+{
+ return ((val >> 12) << A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK;
+}
+
+#define REG_A4XX_RB_STENCIL_PITCH 0x00002109
+#define A4XX_RB_STENCIL_PITCH__MASK 0xffffffff
+#define A4XX_RB_STENCIL_PITCH__SHIFT 0
+static inline uint32_t A4XX_RB_STENCIL_PITCH(uint32_t val)
+{
+ return ((val >> 5) << A4XX_RB_STENCIL_PITCH__SHIFT) & A4XX_RB_STENCIL_PITCH__MASK;
+}
+
#define REG_A4XX_RB_STENCILREFMASK 0x0000210b
#define A4XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff
#define A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0
@@ -1167,6 +1194,8 @@ static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578
#define REG_A4XX_SP_VS_STATUS 0x00000ec0
+#define REG_A4XX_SP_MODE_CONTROL 0x00000ec3
+
#define REG_A4XX_SP_PERFCTR_SP_SEL_11 0x00000ecf
#define REG_A4XX_SP_SP_CTRL_REG 0x000022c0
@@ -1432,6 +1461,20 @@ static inline uint32_t A4XX_SP_FS_MRT_REG_MRTFORMAT(enum a4xx_color_fmt val)
return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK;
}
+#define REG_A4XX_SP_CS_CTRL_REG0 0x00002300
+
+#define REG_A4XX_SP_CS_OBJ_OFFSET_REG 0x00002301
+
+#define REG_A4XX_SP_CS_OBJ_START 0x00002302
+
+#define REG_A4XX_SP_CS_PVT_MEM_PARAM 0x00002303
+
+#define REG_A4XX_SP_CS_PVT_MEM_ADDR 0x00002304
+
+#define REG_A4XX_SP_CS_PVT_MEM_SIZE 0x00002305
+
+#define REG_A4XX_SP_CS_LENGTH_REG 0x00002306
+
#define REG_A4XX_SP_HS_OBJ_OFFSET_REG 0x0000230d
#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
@@ -1454,6 +1497,76 @@ static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A4XX_SP_HS_LENGTH_REG 0x00002312
+#define REG_A4XX_SP_DS_PARAM_REG 0x0000231a
+#define A4XX_SP_DS_PARAM_REG_POSREGID__MASK 0x000000ff
+#define A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT 0
+static inline uint32_t A4XX_SP_DS_PARAM_REG_POSREGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_DS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK 0xfff00000
+#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT 20
+static inline uint32_t A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_DS_OUT(uint32_t i0) { return 0x0000231b + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_DS_OUT_REG(uint32_t i0) { return 0x0000231b + 0x1*i0; }
+#define A4XX_SP_DS_OUT_REG_A_REGID__MASK 0x000001ff
+#define A4XX_SP_DS_OUT_REG_A_REGID__SHIFT 0
+static inline uint32_t A4XX_SP_DS_OUT_REG_A_REGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK 0x00001e00
+#define A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT 9
+static inline uint32_t A4XX_SP_DS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_B_REGID__MASK 0x01ff0000
+#define A4XX_SP_DS_OUT_REG_B_REGID__SHIFT 16
+static inline uint32_t A4XX_SP_DS_OUT_REG_B_REGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK 0x1e000000
+#define A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT 25
+static inline uint32_t A4XX_SP_DS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_DS_VPC_DST(uint32_t i0) { return 0x0000232c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_DS_VPC_DST_REG(uint32_t i0) { return 0x0000232c + 0x1*i0; }
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT 0
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT 8
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT 16
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK 0xff000000
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT 24
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
#define REG_A4XX_SP_DS_OBJ_OFFSET_REG 0x00002334
#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
@@ -1476,6 +1589,82 @@ static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A4XX_SP_DS_LENGTH_REG 0x00002339
+#define REG_A4XX_SP_GS_PARAM_REG 0x00002341
+#define A4XX_SP_GS_PARAM_REG_POSREGID__MASK 0x000000ff
+#define A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT 0
+static inline uint32_t A4XX_SP_GS_PARAM_REG_POSREGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK 0x0000ff00
+#define A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT 8
+static inline uint32_t A4XX_SP_GS_PARAM_REG_PRIMREGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK;
+}
+#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK 0xfff00000
+#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT 20
+static inline uint32_t A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_GS_OUT(uint32_t i0) { return 0x00002342 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_GS_OUT_REG(uint32_t i0) { return 0x00002342 + 0x1*i0; }
+#define A4XX_SP_GS_OUT_REG_A_REGID__MASK 0x000001ff
+#define A4XX_SP_GS_OUT_REG_A_REGID__SHIFT 0
+static inline uint32_t A4XX_SP_GS_OUT_REG_A_REGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK 0x00001e00
+#define A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT 9
+static inline uint32_t A4XX_SP_GS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_B_REGID__MASK 0x01ff0000
+#define A4XX_SP_GS_OUT_REG_B_REGID__SHIFT 16
+static inline uint32_t A4XX_SP_GS_OUT_REG_B_REGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK 0x1e000000
+#define A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT 25
+static inline uint32_t A4XX_SP_GS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_GS_VPC_DST(uint32_t i0) { return 0x00002353 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_GS_VPC_DST_REG(uint32_t i0) { return 0x00002353 + 0x1*i0; }
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT 0
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT 8
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT 16
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK 0xff000000
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT 24
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
#define REG_A4XX_SP_GS_OBJ_OFFSET_REG 0x0000235b
#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
@@ -1677,6 +1866,18 @@ static inline uint32_t A4XX_VFD_CONTROL_3_REGID_VTXCNT(uint32_t val)
{
return ((val) << A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT) & A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK;
}
+#define A4XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000
+#define A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val)
+{
+ return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSX__MASK;
+}
+#define A4XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000
+#define A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val)
+{
+ return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSY__MASK;
+}
#define REG_A4XX_VFD_CONTROL_4 0x00002204
@@ -1758,6 +1959,8 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
#define REG_A4XX_TPL1_DEBUG_ECO_CONTROL 0x00000f00
+#define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03
+
#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b
#define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380
@@ -1800,6 +2003,10 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
#define REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x000023a1
+#define REG_A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR 0x000023a4
+
+#define REG_A4XX_TPL1_TP_CS_SAMPLER_BASE_ADDR 0x000023a5
+
#define REG_A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR 0x000023a6
#define REG_A4XX_GRAS_TSE_STATUS 0x00000c80
@@ -2078,6 +2285,8 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
#define REG_A4XX_HLSQ_DEBUG_ECO_CONTROL 0x00000e04
+#define REG_A4XX_HLSQ_MODE_CONTROL 0x00000e05
+
#define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e
#define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0
@@ -2158,6 +2367,8 @@ static inline uint32_t A4XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
return ((val) << A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A4XX_HLSQ_CONTROL_3_REG_REGID__MASK;
}
+#define REG_A4XX_HLSQ_CONTROL_4_REG 0x000023c4
+
#define REG_A4XX_HLSQ_VS_CONTROL_REG 0x000023c5
#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff
#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0
@@ -2293,6 +2504,36 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
return ((val) << A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK;
}
+#define REG_A4XX_HLSQ_CS_CONTROL 0x000023ca
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_0 0x000023cd
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_1 0x000023ce
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_2 0x000023cf
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_3 0x000023d0
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_4 0x000023d1
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_5 0x000023d2
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_6 0x000023d3
+
+#define REG_A4XX_HLSQ_CL_CONTROL_0 0x000023d4
+
+#define REG_A4XX_HLSQ_CL_CONTROL_1 0x000023d5
+
+#define REG_A4XX_HLSQ_CL_KERNEL_CONST 0x000023d6
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_X 0x000023d7
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Y 0x000023d8
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Z 0x000023d9
+
+#define REG_A4XX_HLSQ_CL_WG_OFFSET 0x000023da
+
#define REG_A4XX_HLSQ_UPDATE_CONTROL 0x000023db
#define REG_A4XX_PC_BINNING_COMMAND 0x00000d00
@@ -2389,16 +2630,10 @@ static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
#define REG_A4XX_UNKNOWN_0D01 0x00000d01
-#define REG_A4XX_UNKNOWN_0E05 0x00000e05
-
#define REG_A4XX_UNKNOWN_0E42 0x00000e42
#define REG_A4XX_UNKNOWN_0EC2 0x00000ec2
-#define REG_A4XX_UNKNOWN_0EC3 0x00000ec3
-
-#define REG_A4XX_UNKNOWN_0F03 0x00000f03
-
#define REG_A4XX_UNKNOWN_2001 0x00002001
#define REG_A4XX_UNKNOWN_209B 0x0000209b
@@ -2439,6 +2674,8 @@ static inline uint32_t A4XX_UNKNOWN_20F7(float val)
#define REG_A4XX_UNKNOWN_22D7 0x000022d7
+#define REG_A4XX_UNKNOWN_2352 0x00002352
+
#define REG_A4XX_TEX_SAMP_0 0x00000000
#define A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001
#define A4XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
index 396caa532fc..d5e823ef69d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
@@ -61,7 +61,7 @@ fd4_blend_state_create(struct pipe_context *pctx,
struct fd4_blend_stateobj *so;
// enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
- int i;
+ unsigned i, mrt_blend = 0;
if (cso->logicop_enable) {
// rop = cso->logicop_func; /* maps 1:1 */
@@ -84,11 +84,6 @@ fd4_blend_state_create(struct pipe_context *pctx,
}
}
- if (cso->independent_blend_enable) {
- DBG("Unsupported! independent blend state");
- return NULL;
- }
-
so = CALLOC_STRUCT(fd4_blend_stateobj);
if (!so)
return NULL;
@@ -96,7 +91,12 @@ fd4_blend_state_create(struct pipe_context *pctx,
so->base = *cso;
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
- const struct pipe_rt_blend_state *rt = &cso->rt[i];
+ const struct pipe_rt_blend_state *rt;
+
+ if (cso->independent_blend_enable)
+ rt = &cso->rt[i];
+ else
+ rt = &cso->rt[0];
so->rb_mrt[i].blend_control =
A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
@@ -115,7 +115,7 @@ fd4_blend_state_create(struct pipe_context *pctx,
A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A4XX_RB_MRT_CONTROL_BLEND |
A4XX_RB_MRT_CONTROL_BLEND2;
- so->rb_fs_output |= A4XX_RB_FS_OUTPUT_ENABLE_BLEND(1);
+ mrt_blend |= (1 << i);
}
if (reads_dest)
@@ -125,5 +125,7 @@ fd4_blend_state_create(struct pipe_context *pctx,
so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
+ so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend);
+
return so;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
index 33641da5e2c..7620d00a625 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
@@ -32,17 +32,19 @@
#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "freedreno_util.h"
+
struct fd4_blend_stateobj {
struct pipe_blend_state base;
struct {
uint32_t control;
uint32_t buf_info;
uint32_t blend_control;
- } rb_mrt[8];
+ } rb_mrt[A4XX_MAX_RENDER_TARGETS];
uint32_t rb_fs_output;
};
-static INLINE struct fd4_blend_stateobj *
+static inline struct fd4_blend_stateobj *
fd4_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd4_blend_stateobj *)blend;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
index 2321876dd48..e172d350517 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -86,7 +86,7 @@ create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
}
static const uint8_t primtypes[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A3XX,
+ [PIPE_PRIM_POINTS] = DI_PT_POINTLIST,
[PIPE_PRIM_LINES] = DI_PT_LINELIST,
[PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
@@ -119,6 +119,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv)
fd4_gmem_init(pctx);
fd4_texture_init(pctx);
fd4_prog_init(pctx);
+ fd4_emit_init(pctx);
pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
if (!pctx)
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
index 53e1bf6a2e6..0b749916841 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -90,7 +90,7 @@ struct fd4_context {
struct ir3_shader_key last_key;
};
-static INLINE struct fd4_context *
+static inline struct fd4_context *
fd4_context(struct fd_context *ctx)
{
return (struct fd4_context *)ctx;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index de5a306af60..2bd2ca23d54 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -48,6 +48,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
{
const struct pipe_draw_info *info = emit->info;
+ if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit)))
+ return;
+
fd4_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
@@ -108,7 +111,6 @@ static void
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd4_emit emit = {
.vtx = &ctx->vtx,
.prog = &ctx->prog,
@@ -129,8 +131,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_t = fd4_ctx->fsaturate_t,
.fsaturate_r = fd4_ctx->fsaturate_r,
},
- .format = fd4_emit_format(pfb->cbufs[0]),
- .pformat = pipe_surface_format(pfb->cbufs[0]),
+ .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false,
+ .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
};
unsigned dirty;
@@ -170,20 +173,16 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
unsigned dirty = ctx->dirty;
- unsigned ce, i;
+ unsigned i;
struct fd4_emit emit = {
.vtx = &fd4_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = {
- .half_precision = true,
+ .half_precision = fd_half_precision(pfb),
},
- .format = fd4_emit_format(pfb->cbufs[0]),
};
- uint32_t colr = 0;
-
- if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
- colr = pack_rgba(pfb->cbufs[0]->format, color->f);
dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
dirty |= FD_DIRTY_PROG;
@@ -257,16 +256,15 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR) {
OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
- ce = 0xf;
- } else {
- ce = 0x0;
}
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0;
+
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
A4XX_RB_MRT_CONTROL_B11 |
- A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+ A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
@@ -277,6 +275,16 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
}
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+
fd4_emit_vertex_bufs(ring, &emit);
OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
@@ -285,14 +293,8 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
- OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW0 */
- OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW1 */
- OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW2 */
- OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */
-
/* until fastclear works: */
- fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+ fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
index 1bd376ca6ec..b89a30a7c4b 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
@@ -106,6 +106,7 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
{
struct pipe_index_buffer *idx = &ctx->indexbuf;
struct fd_bo *idx_bo = NULL;
+ enum pc_di_primtype primtype = ctx->primtypes[info->mode];
enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel;
uint32_t idx_size, idx_offset;
@@ -126,7 +127,12 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
- fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
+ /* points + psize -> spritelist: */
+ if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
+ (info->mode == PIPE_PRIM_POINTS))
+ primtype = DI_PT_POINTLIST_PSIZE;
+
+ fd4_draw(ctx, ring, primtype, vismode, src_sel,
info->count, info->instance_count,
idx_type, idx_size, idx_offset, idx_bo);
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 4b6eb646aa7..b75be29e523 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -43,19 +43,26 @@
#include "fd4_format.h"
#include "fd4_zsa.h"
+static const enum adreno_state_block sb[] = {
+ [SHADER_VERTEX] = SB_VERT_SHADER,
+ [SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
*/
void
-fd4_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc)
{
uint32_t i, sz;
enum adreno_state_src src;
+ debug_assert((regid % 4) == 0);
+ debug_assert((sizedwords % 4) == 0);
+
if (prsc) {
sz = 0;
src = 0x2; // TODO ??
@@ -67,7 +74,7 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
CP_LOAD_STATE_0_STATE_SRC(src) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
if (prsc) {
struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
}
static void
-emit_constants(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
- struct fd_constbuf_stateobj *constbuf,
- struct ir3_shader_variant *shader,
- bool emit_immediates)
+fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+ uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
{
- uint32_t enabled_mask = constbuf->enabled_mask;
- uint32_t max_const;
- int i;
-
- // XXX TODO only emit dirty consts.. but we need to keep track if
- // they are clobbered by a clear, gmem2mem, or mem2gmem..
- constbuf->dirty_mask = enabled_mask;
-
- /* in particular, with binning shader we may end up with unused
- * consts, ie. we could end up w/ constlen that is smaller
- * than first_immediate. In that case truncate the user consts
- * early to avoid HLSQ lockup caused by writing too many consts
- */
- max_const = MIN2(shader->first_driver_param, shader->constlen);
-
- /* emit user constants: */
- if (enabled_mask & 1) {
- const unsigned index = 0;
- struct pipe_constant_buffer *cb = &constbuf->cb[index];
- unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
- // I expect that size should be a multiple of vec4's:
- assert(size == align(size, 4));
-
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, 4 * max_const);
-
- if (size && (constbuf->dirty_mask & (1 << index))) {
- fd4_emit_constant(ring, sb, 0,
- cb->buffer_offset, size,
- cb->user_buffer, cb->buffer);
- constbuf->dirty_mask &= ~(1 << index);
- }
-
- enabled_mask &= ~(1 << index);
- }
-
- /* emit ubos: */
- if (shader->constlen > shader->first_driver_param) {
- uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE_0_NUM_UNIT(params));
- OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
- for (i = 1; i <= params * 4; i++) {
- struct pipe_constant_buffer *cb = &constbuf->cb[i];
- assert(!cb->user_buffer);
- if ((enabled_mask & (1 << i)) && cb->buffer)
- OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
- else
- OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
- }
- }
-
- /* emit shader immediates: */
- if (shader && emit_immediates) {
- int size = shader->immediates_count;
- uint32_t base = shader->first_immediate;
-
- /* truncate size to avoid writing constants that shader
- * does not use:
- */
- size = MIN2(size + base, shader->constlen) - base;
+ uint32_t i;
- /* convert out of vec4: */
- base *= 4;
- size *= 4;
+ debug_assert((regid % 4) == 0);
+ debug_assert((num % 4) == 0);
- if (size > 0) {
- fd4_emit_constant(ring, sb, base,
- 0, size, shader->immediates[0].val, NULL);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+ CP_LOAD_STATE_0_NUM_UNIT(num/4));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+ for (i = 0; i < num; i++) {
+ if (bos[i]) {
+ if (write) {
+ OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+ } else {
+ OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+ }
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
}
}
}
@@ -223,15 +172,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
fd4_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
- struct fd_resource *rsc = fd_resource(view->base.texture);
- unsigned start = view->base.u.tex.first_level;
- uint32_t offset = fd_resource_offset(rsc, start, 0);
+ unsigned start = fd_sampler_first_level(&view->base);
OUT_RING(ring, view->texconst0);
OUT_RING(ring, view->texconst1);
OUT_RING(ring, view->texconst2);
OUT_RING(ring, view->texconst3);
- OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+ if (view->base.texture) {
+ struct fd_resource *rsc = fd_resource(view->base.texture);
+ uint32_t offset = fd_resource_offset(rsc, start, 0);
+ OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
@@ -244,51 +197,110 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
* special cases..
*/
void
-fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
+ struct pipe_surface **bufs)
{
- struct fd_resource *rsc = fd_resource(psurf->texture);
- unsigned lvl = psurf->u.tex.level;
- struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
- uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer);
- enum pipe_format format = fd4_gmem_restore_format(psurf->format);
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS];
+ int i;
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = (i < nr_bufs) ? 0xf : 0;
+ }
/* output sampler state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 4);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs));
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
- CP_LOAD_STATE_0_NUM_UNIT(1));
+ CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
- A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
- A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
- A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
- A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
- OUT_RING(ring, 0x00000000);
+ for (i = 0; i < nr_bufs; i++) {
+ OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
+ A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
+ A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
+ A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
+ A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
+ OUT_RING(ring, 0x00000000);
+ }
/* emit texture state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 10);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * nr_bufs));
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
- CP_LOAD_STATE_0_NUM_UNIT(1));
+ CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
- A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
- fd4_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
- PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
- OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(psurf->width) |
- A4XX_TEX_CONST_1_HEIGHT(psurf->height));
- OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
- OUT_RING(ring, 0x00000000);
- OUT_RELOC(ring, rsc->bo, offset, 0, 0);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ for (i = 0; i < nr_bufs; i++) {
+ if (bufs[i]) {
+ struct fd_resource *rsc = fd_resource(bufs[i]->texture);
+ /* note: PIPE_BUFFER disallowed for surfaces */
+ unsigned lvl = bufs[i]->u.tex.level;
+ struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
+ uint32_t offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer);
+ enum pipe_format format = fd4_gmem_restore_format(bufs[i]->format);
+
+ /* The restore blit_zs shader expects stencil in sampler 0,
+ * and depth in sampler 1
+ */
+ if (rsc->stencil && (i == 0)) {
+ rsc = rsc->stencil;
+ format = fd4_gmem_restore_format(rsc->base.b.format);
+ }
+
+ /* z32 restore is accomplished using depth write. If there is
+ * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ * then no render target:
+ *
+ * (The same applies for z32_s8x24, since for stencil sampler
+ * state the above 'if' will replace 'format' with s8)
+ */
+ if ((format == PIPE_FORMAT_Z32_FLOAT) ||
+ (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT))
+ mrt_comp[i] = 0;
+
+ debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer);
+
+ OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
+ A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
+ fd4_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+ PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
+ OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
+ A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
+ OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
+ OUT_RING(ring, 0x00000000);
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ } else {
+ OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) |
+ A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
+ A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) |
+ A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) |
+ A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) |
+ A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE));
+ OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) |
+ A4XX_TEX_CONST_1_HEIGHT(0));
+ OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
}
void
@@ -298,7 +310,9 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
uint32_t total_in = 0;
const struct fd_vertex_state *vtx = emit->vtx;
struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
- unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+ unsigned vertex_regid = regid(63, 0);
+ unsigned instance_regid = regid(63, 0);
+ unsigned vtxcnt_regid = regid(63, 0);
for (i = 0; i < vp->inputs_count; i++) {
uint8_t semantic = sem2name(vp->inputs[i].semantic);
@@ -306,6 +320,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
vertex_regid = vp->inputs[i].regid;
else if (semantic == TGSI_SEMANTIC_INSTANCEID)
instance_regid = vp->inputs[i].regid;
+ else if (semantic == IR3_SEMANTIC_VTXCNT)
+ vtxcnt_regid = vp->inputs[i].regid;
else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask)
last = i;
}
@@ -313,7 +329,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
/* hw doesn't like to be configured for zero vbo's, it seems: */
if ((vtx->vtx->num_elements == 0) &&
(vertex_regid == regid(63, 0)) &&
- (instance_regid == regid(63, 0)))
+ (instance_regid == regid(63, 0)) &&
+ (vtxcnt_regid == regid(63, 0)))
return;
for (i = 0, j = 0; i <= last; i++) {
@@ -327,7 +344,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
bool switchnext = (i != last) ||
(vertex_regid != regid(63, 0)) ||
- (instance_regid != regid(63, 0));
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
bool isint = util_format_is_pure_integer(pfmt);
uint32_t fs = util_format_get_blocksize(pfmt);
uint32_t off = vb->buffer_offset + elem->src_offset;
@@ -368,7 +386,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */
- OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(regid(63, 0)));
+ OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */
/* cache invalidate, otherwise vertex fetch could see
@@ -389,6 +407,25 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
emit_marker(ring, 5);
+ if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
+
+ for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+ }
+
if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control;
@@ -513,43 +550,24 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
}
- if (dirty & FD_DIRTY_PROG)
- fd4_program_emit(ring, emit);
-
- if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
- /* evil hack to deal sanely with clear path: */
- (emit->prog == &ctx->prog)) {
- fd_wfi(ctx, ring);
- emit_constants(ring, SB_VERT_SHADER,
- &ctx->constbuf[PIPE_SHADER_VERTEX],
- vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
- if (!emit->key.binning_pass) {
- emit_constants(ring, SB_FRAG_SHADER,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
- }
+ if (dirty & FD_DIRTY_PROG) {
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
}
- /* emit driver params every time */
- if (emit->info && emit->prog == &ctx->prog) {
- uint32_t vertex_params[4] = {
- emit->info->indexed ? emit->info->index_bias : emit->info->start,
- 0,
- 0,
- 0
- };
- if (vp->constlen >= vp->first_driver_param + 4) {
- fd4_emit_constant(ring, SB_VERT_SHADER,
- (vp->first_driver_param + 4) * 4,
- 0, 4, vertex_params, NULL);
- }
+ if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+ ir3_emit_consts(vp, ring, emit->info, dirty);
+ if (!emit->key.binning_pass)
+ ir3_emit_consts(fp, ring, emit->info, dirty);
+ /* mark clean after emitting consts: */
+ ctx->prog.dirty = 0;
}
if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
uint32_t i;
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, blend->rb_mrt[i].control);
@@ -607,10 +625,10 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1);
+ OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1);
OUT_RING(ring, 0x00000006);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1);
+ OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1);
OUT_RING(ring, 0x0000003a);
OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
@@ -629,7 +647,7 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000012);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1);
+ OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
@@ -752,9 +770,6 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
- OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(0xf));
-
OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
@@ -763,3 +778,11 @@ fd4_emit_restore(struct fd_context *ctx)
ctx->needs_rb_fbd = true;
}
+
+void
+fd4_emit_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->emit_const = fd4_emit_const;
+ ctx->emit_const_bo = fd4_emit_const_bo;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
index 7d059f8e532..ab7850e50b0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
@@ -37,15 +37,13 @@
#include "ir3_shader.h"
struct fd_ringbuffer;
-enum adreno_state_block;
-void fd4_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc);
void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
- struct pipe_surface *psurf);
+ unsigned nr_bufs, struct pipe_surface **bufs);
/* grouped together emit-state for prog/vertex/state emit: */
struct fd4_emit {
@@ -53,10 +51,12 @@ struct fd4_emit {
const struct fd_program_stateobj *prog;
const struct pipe_draw_info *info;
struct ir3_shader_key key;
- enum a4xx_color_fmt format;
- enum pipe_format pformat;
uint32_t dirty;
+ uint32_t sprite_coord_enable; /* bitmask */
+ bool sprite_coord_mode;
+ bool rasterflat;
+
/* cached to avoid repeated lookups of same variants: */
struct ir3_shader_variant *vp, *fp;
/* TODO: other shader stages.. */
@@ -96,4 +96,6 @@ void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
void fd4_emit_restore(struct fd_context *ctx);
+void fd4_emit_init(struct pipe_context *pctx);
+
#endif /* FD4_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 29abe0b0cc3..3e0045449eb 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -89,6 +89,14 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(L8_UNORM, 8_UNORM, R8_UNORM, WZYX),
_T(I8_UNORM, 8_UNORM, NONE, WZYX),
+ /* NOTE: should be TFMT_8_UINT (which then gets remapped to
+ * TFMT_8_UNORM for mem2gmem in _gmem_restore_format()), but
+ * we don't know TFMT_8_UINT yet.. so just use TFMT_8_UNORM
+ * for now.. sampling from stencil as a texture might not
+ * work right, but at least should be fine for zsbuf..
+ */
+ _T(S8_UINT, 8_UNORM, R8_UNORM, WZYX),
+
/* 16-bit */
V_(R16_UNORM, 16_UNORM, NONE, WZYX),
V_(R16_SNORM, 16_SNORM, NONE, WZYX),
@@ -96,7 +104,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R16_SINT, 16_SINT, R16_SINT, WZYX),
V_(R16_USCALED, 16_UINT, NONE, WZYX),
V_(R16_SSCALED, 16_UINT, NONE, WZYX),
- VT(R16_FLOAT, 16_FLOAT, NONE, WZYX),
+ VT(R16_FLOAT, 16_FLOAT, R16_FLOAT,WZYX),
_T(A16_UINT, 16_UINT, NONE, WZYX),
_T(A16_SINT, 16_SINT, NONE, WZYX),
@@ -132,7 +140,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R32_SINT, 32_SINT, R32_SINT, WZYX),
V_(R32_USCALED, 32_UINT, NONE, WZYX),
V_(R32_SSCALED, 32_UINT, NONE, WZYX),
- VT(R32_FLOAT, 32_FLOAT, NONE, WZYX),
+ VT(R32_FLOAT, 32_FLOAT, R32_FLOAT,WZYX),
V_(R32_FIXED, 32_FIXED, NONE, WZYX),
_T(A32_UINT, 32_UINT, NONE, WZYX),
@@ -148,7 +156,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX),
V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX),
V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX),
- VT(R16G16_FLOAT, 16_16_FLOAT, NONE, WZYX),
+ VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT,WZYX),
_T(L16A16_UINT, 16_16_UINT, NONE, WZYX),
_T(L16A16_SINT, 16_16_SINT, NONE, WZYX),
@@ -191,7 +199,8 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
_T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
- /*_T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX),*/
+ _T(Z32_FLOAT, 32_FLOAT, R8G8B8A8_UNORM, WZYX),
+ _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,R8G8B8A8_UNORM, WZYX),
/* 48-bit */
V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX),
@@ -218,7 +227,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R32G32_SINT, 32_32_SINT, R32G32_SINT, WZYX),
V_(R32G32_USCALED, 32_32_UINT, NONE, WZYX),
V_(R32G32_SSCALED, 32_32_SINT, NONE, WZYX),
- VT(R32G32_FLOAT, 32_32_FLOAT, NONE, WZYX),
+ VT(R32G32_FLOAT, 32_32_FLOAT, R32G32_FLOAT,WZYX),
V_(R32G32_FIXED, 32_32_FIXED, NONE, WZYX),
_T(L32A32_UINT, 32_32_UINT, NONE, WZYX),
@@ -282,6 +291,9 @@ fd4_pipe2swap(enum pipe_format format)
enum a4xx_tex_fetchsize
fd4_pipe2fetchsize(enum pipe_format format)
{
+ if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ format = PIPE_FORMAT_Z32_FLOAT;
+
switch (util_format_get_blocksizebits(format)) {
case 8: return TFETCH4_1_BYTE;
case 16: return TFETCH4_2_BYTE;
@@ -312,6 +324,8 @@ fd4_gmem_restore_format(enum pipe_format format)
return PIPE_FORMAT_R8G8B8A8_UNORM;
case PIPE_FORMAT_Z16_UNORM:
return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_S8_UINT:
+ return PIPE_FORMAT_R8_UNORM;
default:
return format;
}
@@ -328,6 +342,9 @@ fd4_pipe2depth(enum pipe_format format)
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return DEPTH4_24_8;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return DEPTH4_32;
default:
return ~0;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
index 9a905062071..81c37f72565 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -44,12 +44,6 @@
#include "fd4_format.h"
#include "fd4_zsa.h"
-static const struct ir3_shader_key key = {
- // XXX should set this based on render target format! We don't
- // want half_precision if float32 render target!!!
- .half_precision = true,
-};
-
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
@@ -63,7 +57,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
tile_mode = TILE4_LINEAR;
}
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
enum a4xx_color_fmt format = 0;
enum a3xx_color_swap swap = WZYX;
struct fd_resource *rsc = NULL;
@@ -74,11 +68,23 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
if ((i < nr_bufs) && bufs[i]) {
struct pipe_surface *psurf = bufs[i];
+ enum pipe_format pformat = 0;
rsc = fd_resource(psurf->texture);
+ pformat = psurf->format;
+
+ /* In case we're drawing to Z32F_S8, the "color" actually goes to
+ * the stencil
+ */
+ if (rsc->stencil) {
+ rsc = rsc->stencil;
+ pformat = rsc->base.b.format;
+ bases++;
+ }
+
slice = fd_resource_slice(rsc, psurf->u.tex.level);
- format = fd4_pipe2color(psurf->format);
- swap = fd4_pipe2swap(psurf->format);
+ format = fd4_pipe2color(pformat);
+ swap = fd4_pipe2swap(pformat);
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
@@ -94,6 +100,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
} else {
stride = slice->pitch * rsc->cpp;
}
+ } else if ((i < nr_bufs) && bases) {
+ base = bases[i];
}
OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
@@ -101,7 +109,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
- if (bin_w || (i >= nr_bufs)) {
+ if (bin_w || (i >= nr_bufs) || !bufs[i]) {
OUT_RING(ring, base);
OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
} else {
@@ -115,30 +123,26 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
}
}
-static uint32_t
-depth_base(struct fd_context *ctx)
-{
- struct fd_gmem_stateobj *gmem = &ctx->gmem;
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- uint32_t cpp = 4;
- if (pfb->cbufs[0]) {
- struct fd_resource *rsc =
- fd_resource(pfb->cbufs[0]->texture);
- cpp = rsc->cpp;
- }
- return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
-}
-
/* transfer from gmem to system memory (ie. normal RAM) */
static void
-emit_gmem2mem_surf(struct fd_context *ctx,
+emit_gmem2mem_surf(struct fd_context *ctx, bool stencil,
uint32_t base, struct pipe_surface *psurf)
{
struct fd_ringbuffer *ring = ctx->ring;
struct fd_resource *rsc = fd_resource(psurf->texture);
- struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
- uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
+ enum pipe_format pformat = psurf->format;
+ struct fd_resource_slice *slice;
+ uint32_t offset;
+
+ if (stencil) {
+ debug_assert(rsc->stencil);
+ rsc = rsc->stencil;
+ pformat = rsc->base.b.format;
+ }
+
+ slice = &rsc->slices[psurf->u.tex.level];
+ offset = fd_resource_offset(rsc, psurf->u.tex.level,
psurf->u.tex.first_layer);
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
@@ -150,10 +154,10 @@ emit_gmem2mem_surf(struct fd_context *ctx,
OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
- A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) |
+ A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(pformat)) |
A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
- A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(psurf->format)));
+ A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat)));
fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -163,13 +167,15 @@ static void
fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd4_emit emit = {
.vtx = &fd4_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
- .key = key,
- .format = fd4_emit_format(pfb->cbufs[0]),
+ .key = {
+ .half_precision = true,
+ },
};
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
@@ -238,16 +244,26 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
- fd4_program_emit(ring, &emit);
+ fd4_program_emit(ring, &emit, 0, NULL);
fd4_emit_vertex_bufs(ring, &emit);
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- uint32_t base = depth_base(ctx);
- emit_gmem2mem_surf(ctx, base, pfb->zsbuf);
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ if (!rsc->stencil || (ctx->resolve & FD_BUFFER_DEPTH))
+ emit_gmem2mem_surf(ctx, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf);
+ if (rsc->stencil && (ctx->resolve & FD_BUFFER_STENCIL))
+ emit_gmem2mem_surf(ctx, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf);
}
if (ctx->resolve & FD_BUFFER_COLOR) {
- emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
+ unsigned i;
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+ emit_gmem2mem_surf(ctx, false, gmem->cbuf_base[i], pfb->cbufs[i]);
+ }
}
OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
@@ -260,14 +276,25 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
/* transfer from system memory to gmem */
static void
-emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
- struct pipe_surface *psurf, uint32_t bin_w)
+emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases,
+ struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w)
{
struct fd_ringbuffer *ring = ctx->ring;
+ struct pipe_surface *zsbufs[2];
+
+ emit_mrt(ring, nr_bufs, bufs, bases, bin_w);
+
+ if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+ /* The gmem_restore_tex logic will put the first buffer's stencil
+ * as color. Supply it with the proper information to make that
+ * happen.
+ */
+ zsbufs[0] = zsbufs[1] = bufs[0];
+ bufs = zsbufs;
+ nr_bufs = 2;
+ }
- emit_mrt(ring, 1, &psurf, &base, bin_w);
-
- fd4_emit_gmem_restore_tex(ring, psurf);
+ fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs);
fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -282,10 +309,14 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd4_emit emit = {
.vtx = &fd4_ctx->blit_vbuf_state,
+ .sprite_coord_enable = 1,
+ /* NOTE: They all use the same VP, this is for vtx bufs. */
.prog = &ctx->blit_prog[0],
- .key = key,
- .format = fd4_emit_format(pfb->cbufs[0]),
+ .key = {
+ .half_precision = fd_half_precision(pfb),
+ },
};
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
float x0, y0, x1, y1;
unsigned bin_w = tile->bin_w;
unsigned bin_h = tile->bin_h;
@@ -304,7 +335,9 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, fui(x1));
OUT_RING(ring, fui(y1));
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
A4XX_RB_MRT_CONTROL_B11 |
@@ -319,6 +352,16 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
}
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+
OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
OUT_RING(ring, 0x8); /* XXX RB_RENDER_CONTROL */
@@ -381,7 +424,6 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
- fd4_program_emit(ring, &emit);
fd4_emit_vertex_bufs(ring, &emit);
/* for gmem pitch/base calculations, we need to use the non-
@@ -390,11 +432,46 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
bin_w = gmem->bin_w;
bin_h = gmem->bin_h;
- if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
- emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);
+ if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
+ emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
+ emit.fp = NULL; /* frag shader changed so clear cache */
+ fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
+ emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
+ }
- if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
- emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);
+ if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ switch (pfb->zsbuf->format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_Z32_FLOAT:
+ emit.prog = (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) ?
+ &ctx->blit_z : &ctx->blit_zs;
+ emit.key.half_precision = false;
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+ A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+ A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
+ A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
+
+ break;
+ default:
+ /* Non-float can use a regular color write. It's split over 8-bit
+ * components, so half precision is always sufficient.
+ */
+ emit.prog = &ctx->blit_prog[0];
+ emit.key.half_precision = true;
+ break;
+ }
+ emit.fp = NULL; /* frag shader changed so clear cache */
+ fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
+ emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
+ }
OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
@@ -534,21 +611,35 @@ fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
- uint32_t reg;
- OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
- reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
if (pfb->zsbuf) {
- reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format));
- }
- OUT_RING(ring, reg);
- if (pfb->zsbuf) {
- uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ uint32_t cpp = rsc->cpp;
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
+ OUT_RING(ring, A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]) |
+ A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format)));
OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
+ if (rsc->stencil) {
+ OUT_RING(ring, A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL |
+ A4XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
+ OUT_RING(ring, A4XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
} else {
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
+ OUT_RING(ring, 0); /* RB_STENCIL_INFO */
+ OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
}
OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
@@ -586,7 +677,7 @@ fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w);
/* setup scissor/offset for current tile: */
OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index e8f5837f7ce..1a6d0142132 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -31,8 +31,6 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
#include "freedreno_program.h"
@@ -53,7 +51,7 @@ create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state
enum shader_t type)
{
struct fd4_shader_stateobj *so = CALLOC_STRUCT(fd4_shader_stateobj);
- so->shader = ir3_shader_create(pctx, cso->tokens, type);
+ so->shader = ir3_shader_create(pctx, cso, type);
return so;
}
@@ -213,14 +211,17 @@ setup_stages(struct fd4_emit *emit, struct stage *s)
}
void
-fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
+fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
+ int nr, struct pipe_surface **bufs)
{
struct stage s[MAX_STAGES];
- uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+ uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
uint32_t face_regid, coord_regid, zwcoord_regid;
int constmode;
int i, j, k;
+ debug_assert(nr <= ARRAY_SIZE(color_regid));
+
setup_stages(emit, s);
/* blob seems to always use constmode currently: */
@@ -232,11 +233,30 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
psize_regid = ir3_find_output_regid(s[VS].v,
ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
- color_regid = ir3_find_output_regid(s[FS].v,
- ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+ if (s[FS].v->color0_mrt) {
+ color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+ color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
+ ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+ } else {
+ const struct ir3_shader_variant *fp = s[FS].v;
+ memset(color_regid, 0, sizeof(color_regid));
+ for (i = 0; i < fp->outputs_count; i++) {
+ ir3_semantic sem = fp->outputs[i].semantic;
+ unsigned idx = sem2idx(sem);
+ if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
+ continue;
+ debug_assert(idx < ARRAY_SIZE(color_regid));
+ color_regid[idx] = fp->outputs[i].regid;
+ }
+ }
+
+ /* adjust regids for alpha output formats. there is no alpha render
+ * format, so it's just treated like red
+ */
+ for (i = 0; i < nr; i++)
+ if (util_format_is_alpha(pipe_surface_format(bufs[i])))
+ color_regid[i] += 3;
- if (util_format_is_alpha(emit->pformat))
- color_regid += 3;
/* TODO get these dynamically: */
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
@@ -419,29 +439,24 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
A4XX_RB_RENDER_CONTROL2_WCOORD));
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
- OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(1) |
+ OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(MAX2(1, nr)) |
COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z));
OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
- if (s[FS].v->writes_pos) {
- OUT_RING(ring, 0x00000001 |
- A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
- A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
- } else {
- OUT_RING(ring, 0x00000001);
- }
+ OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr)) |
+ COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
+ A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid) |
- A4XX_SP_FS_MRT_REG_MRTFORMAT(emit->format) |
- COND(emit->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+ for (i = 0; i < 8; i++) {
+ enum a4xx_color_fmt format = 0;
+ if (i < nr)
+ format = fd4_emit_format(bufs[i]);
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
+ A4XX_SP_FS_MRT_REG_MRTFORMAT(format) |
+ COND(emit->key.half_precision,
+ A4XX_SP_FS_MRT_REG_HALF_PRECISION));
+ }
if (emit->key.binning_pass) {
OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
@@ -450,10 +465,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
OUT_RING(ring, 0x00000000);
} else {
- uint32_t vinterp[8], flatshade[2];
+ uint32_t vinterp[8], vpsrepl[8];
memset(vinterp, 0, sizeof(vinterp));
- memset(flatshade, 0, sizeof(flatshade));
+ memset(vpsrepl, 0, sizeof(vpsrepl));
/* looks like we need to do int varyings in the frag
* shader on a4xx (no flatshad reg? or a420.0 bug?):
@@ -470,29 +485,40 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
* something like the code below instead of workaround
* in the shader:
*/
-#if 0
- /* figure out VARYING_INTERP / FLAT_SHAD register values: */
+ /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
uint32_t interp = s[FS].v->inputs[j].interpolate;
+
+ /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
+ * instead.. rather than -8 everywhere else..
+ */
+ uint32_t inloc = s[FS].v->inputs[j].inloc - 8;
+
+ /* currently assuming varyings aligned to 4 (not
+ * packed):
+ */
+ debug_assert((inloc % 4) == 0);
+
if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
- /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
- * instead.. rather than -8 everywhere else..
- */
- uint32_t loc = s[FS].v->inputs[j].inloc - 8;
-
- /* currently assuming varyings aligned to 4 (not
- * packed):
- */
- debug_assert((loc % 4) == 0);
+ uint32_t loc = inloc;
for (i = 0; i < 4; i++, loc++) {
vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
- flatshade[loc / 32] |= 1 << (loc % 32);
+ //flatshade[loc / 32] |= 1 << (loc % 32);
}
}
+
+ /* Replace the .xy coordinates with S/T from the point sprite. Set
+ * interpolation bits for .zw such that they become .01
+ */
+ if (emit->sprite_coord_enable & (1 << sem2idx(s[FS].v->inputs[j].semantic))) {
+ vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+ << ((inloc % 16) * 2);
+ vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+ vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+ }
}
-#endif
OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
@@ -509,7 +535,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
for (i = 0; i < 8; i++)
- OUT_RING(ring, s[FS].v->shader->vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
+ OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
}
if (s[VS].instrlen)
@@ -520,19 +546,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
emit_shader(ring, s[FS].v);
}
-/* hack.. until we figure out how to deal w/ vpsrepl properly.. */
-static void
-fix_blit_fp(struct pipe_context *pctx)
-{
- struct fd_context *ctx = fd_context(pctx);
- struct fd4_shader_stateobj *so = ctx->blit_prog[0].fp;
-
- so->shader->vpsrepl[0] = 0x99999999;
- so->shader->vpsrepl[1] = 0x99999999;
- so->shader->vpsrepl[2] = 0x99999999;
- so->shader->vpsrepl[3] = 0x99999999;
-}
-
void
fd4_prog_init(struct pipe_context *pctx)
{
@@ -543,6 +556,4 @@ fd4_prog_init(struct pipe_context *pctx)
pctx->delete_vs_state = fd4_vp_state_delete;
fd_prog_init(pctx);
-
- fix_blit_fp(pctx);
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.h b/src/gallium/drivers/freedreno/a4xx/fd4_program.h
index 52306a4c60d..8dfccaf9d74 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.h
@@ -39,7 +39,8 @@ struct fd4_shader_stateobj {
struct fd4_emit;
-void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit);
+void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
+ int nr, struct pipe_surface **bufs);
void fd4_prog_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
index 6db1c11b94b..4f69e0c1694 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_query.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
@@ -31,9 +31,93 @@
#include "freedreno_util.h"
#include "fd4_query.h"
+#include "fd4_draw.h"
#include "fd4_format.h"
+
+struct fd_rb_samp_ctrs {
+ uint64_t ctr[16];
+};
+
+/*
+ * Occlusion Query:
+ *
+ * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
+ * interpret results
+ */
+
+static struct fd_hw_sample *
+occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ struct fd_hw_sample *samp =
+ fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));
+
+ /* low bits of sample addr should be zero (since they are control
+ * flags in RB_SAMPLE_COUNT_CONTROL):
+ */
+ debug_assert((samp->offset & 0x3) == 0);
+
+ /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
+ * HW_QUERY_BASE_REG register:
+ */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A4XX_RB_SAMPLE_COUNT_CONTROL) | 0x80000000);
+ OUT_RING(ring, HW_QUERY_BASE_REG);
+ OUT_RING(ring, A4XX_RB_SAMPLE_COUNT_CONTROL_COPY |
+ samp->offset);
+
+ OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, 3);
+ OUT_RING(ring, DRAW4(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
+ INDEX4_SIZE_32_BIT, USE_VISIBILITY));
+ OUT_RING(ring, 1); /* NumInstances */
+ OUT_RING(ring, 0); /* NumIndices */
+
+ fd_event_write(ctx, ring, ZPASS_DONE);
+
+ return samp;
+}
+
+static uint64_t
+count_samples(const struct fd_rb_samp_ctrs *start,
+ const struct fd_rb_samp_ctrs *end)
+{
+ return end->ctr[0] - start->ctr[0];
+}
+
+static void
+occlusion_counter_accumulate_result(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result)
+{
+ uint64_t n = count_samples(start, end);
+ result->u64 += n;
+}
+
+static void
+occlusion_predicate_accumulate_result(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result)
+{
+ uint64_t n = count_samples(start, end);
+ result->b |= (n > 0);
+}
+
+static const struct fd_hw_sample_provider occlusion_counter = {
+ .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
+ .active = FD_STAGE_DRAW,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_counter_accumulate_result,
+};
+
+static const struct fd_hw_sample_provider occlusion_predicate = {
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
+ .active = FD_STAGE_DRAW,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_predicate_accumulate_result,
+};
+
void fd4_query_context_init(struct pipe_context *pctx)
{
- /* TODO */
+ fd_hw_query_register_provider(pctx, &occlusion_counter);
+ fd_hw_query_register_provider(pctx, &occlusion_predicate);
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
index e54b606a285..dc7e98b149d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -50,7 +50,7 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
- psize_max = 8192;
+ psize_max = 4092;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
@@ -67,9 +67,9 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
*/
so->gras_cl_clip_cntl = 0x80000; /* ??? */
so->gras_su_point_minmax =
- A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
- A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
- so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
+ A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+ A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+ so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_poly_offset_scale =
A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
so->gras_su_poly_offset_offset =
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
index 06c728f2f1f..64e81a9983b 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
@@ -44,7 +44,7 @@ struct fd4_rasterizer_stateobj {
uint32_t pc_prim_vtx_cntl;
};
-static INLINE struct fd4_rasterizer_stateobj *
+static inline struct fd4_rasterizer_stateobj *
fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd4_rasterizer_stateobj *)rast;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
index e8cbb2d201a..d8ea414f300 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
@@ -102,7 +102,7 @@ void
fd4_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
- screen->max_rts = 1;
+ screen->max_rts = A4XX_MAX_RENDER_TARGETS;
screen->compiler = ir3_compiler_create(screen->gpu_id);
pscreen->context_create = fd4_context_create;
pscreen->is_format_supported = fd4_screen_is_format_supported;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 6ba25d0816d..d2bc5fee6c0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -150,8 +150,8 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = cso->u.tex.first_level;
- unsigned miplevels = cso->u.tex.last_level - lvl;
+ unsigned lvl = fd_sampler_first_level(cso);
+ unsigned miplevels = fd_sampler_last_level(cso) - lvl;
if (!so)
return NULL;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
index 579ed87f14b..84ee7ecb50c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -42,7 +42,7 @@ struct fd4_sampler_stateobj {
uint32_t texsamp0, texsamp1;
};
-static INLINE struct fd4_sampler_stateobj *
+static inline struct fd4_sampler_stateobj *
fd4_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd4_sampler_stateobj *)samp;
@@ -53,7 +53,7 @@ struct fd4_pipe_sampler_view {
uint32_t texconst0, texconst1, texconst2, texconst3, textconst4;
};
-static INLINE struct fd4_pipe_sampler_view *
+static inline struct fd4_pipe_sampler_view *
fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd4_pipe_sampler_view *)pview;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
index 033317cf620..6a92a9b6785 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
@@ -47,7 +47,7 @@ struct fd4_zsa_stateobj {
uint32_t rb_stencilrefmask_bf;
};
-static INLINE struct fd4_zsa_stateobj *
+static inline struct fd4_zsa_stateobj *
fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd4_zsa_stateobj *)zsa;
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index b23aa830770..00b6acba065 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -8,15 +8,15 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
-
-Copyright (C) 2013-2014 by the following authors:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
+
+Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index 2b24c5b4e78..98a90e26679 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -8,13 +8,13 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
@@ -67,7 +67,7 @@ enum vgt_event_type {
enum pc_di_primtype {
DI_PT_NONE = 0,
- DI_PT_POINTLIST_A2XX = 1,
+ DI_PT_POINTLIST_PSIZE = 1,
DI_PT_LINELIST = 2,
DI_PT_LINESTRIP = 3,
DI_PT_TRILIST = 4,
@@ -75,7 +75,7 @@ enum pc_di_primtype {
DI_PT_TRISTRIP = 6,
DI_PT_LINELOOP = 7,
DI_PT_RECTLIST = 8,
- DI_PT_POINTLIST_A3XX = 9,
+ DI_PT_POINTLIST = 9,
DI_PT_LINE_ADJ = 10,
DI_PT_LINESTRIP_ADJ = 11,
DI_PT_TRI_ADJ = 12,
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 668ef3629bf..8e6d43150ce 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -94,9 +94,7 @@ void
fd_context_render(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_resource *rsc, *rsc_tmp;
- int i;
DBG("needs_flush: %d", ctx->needs_flush);
@@ -118,20 +116,11 @@ fd_context_render(struct pipe_context *pctx)
ctx->gmem_reason = 0;
ctx->num_draws = 0;
- for (i = 0; i < pfb->nr_cbufs; i++)
- if (pfb->cbufs[i])
- fd_resource(pfb->cbufs[i]->texture)->dirty = false;
- if (pfb->zsbuf) {
- rsc = fd_resource(pfb->zsbuf->texture);
- rsc->dirty = false;
- if (rsc->stencil)
- rsc->stencil->dirty = false;
- }
-
/* go through all the used resources and clear their reading flag */
LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &ctx->used_resources, list) {
- assert(rsc->reading);
- rsc->reading = false;
+ debug_assert(rsc->status != 0);
+ rsc->status = 0;
+ rsc->pending_ctx = NULL;
list_delinit(&rsc->list);
}
@@ -144,8 +133,10 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
{
fd_context_render(pctx);
- if (fence)
+ if (fence) {
+ fd_screen_fence_ref(pctx->screen, fence, NULL);
*fence = fd_fence_create(pctx);
+ }
}
void
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index e420f1e5bd9..509a90fdf23 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -82,6 +82,20 @@ struct fd_vertex_stateobj {
unsigned num_elements;
};
+struct fd_streamout_stateobj {
+ struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+ unsigned num_targets;
+ /* Track offset from vtxcnt for streamout data. This counter
+ * is just incremented by # of vertices on each draw until
+ * reset or new streamout buffer bound.
+ *
+ * When we eventually have GS, the CPU won't actually know the
+ * number of vertices per draw, so I think we'll have to do
+ * something more clever.
+ */
+ unsigned offsets[PIPE_MAX_SO_BUFFERS];
+};
+
/* group together the vertex and vertexbuf state.. for ease of passing
* around, and because various internal operations (gmem<->mem, etc)
* need their own vertex state:
@@ -179,7 +193,7 @@ struct fd_context {
struct fd_program_stateobj solid_prog; // TODO move to screen?
/* shaders used by mem->gmem blits: */
- struct fd_program_stateobj blit_prog[8]; // TODO move to screen?
+ struct fd_program_stateobj blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
struct fd_program_stateobj blit_z, blit_zs;
/* do we need to mem2gmem before rendering. We don't, if for example,
@@ -319,6 +333,7 @@ struct fd_context {
FD_DIRTY_VTXBUF = (1 << 15),
FD_DIRTY_INDEXBUF = (1 << 16),
FD_DIRTY_SCISSOR = (1 << 17),
+ FD_DIRTY_STREAMOUT = (1 << 18),
} dirty;
struct pipe_blend_state *blend;
@@ -339,6 +354,7 @@ struct fd_context {
struct pipe_viewport_state viewport;
struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
struct pipe_index_buffer indexbuf;
+ struct fd_streamout_stateobj streamout;
/* GMEM/tile handling fxns: */
void (*emit_tile_init)(struct fd_context *ctx);
@@ -351,18 +367,25 @@ struct fd_context {
void (*emit_sysmem_prep)(struct fd_context *ctx);
/* draw: */
- void (*draw_vbo)(struct fd_context *pctx, const struct pipe_draw_info *info);
+ void (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info);
void (*clear)(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil);
+
+ /* constant emit: (note currently not used/needed for a2xx) */
+ void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
+ uint32_t regid, uint32_t offset, uint32_t sizedwords,
+ const uint32_t *dwords, struct pipe_resource *prsc);
+ void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+ uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets);
};
-static INLINE struct fd_context *
+static inline struct fd_context *
fd_context(struct pipe_context *pctx)
{
return (struct fd_context *)pctx;
}
-static INLINE struct pipe_scissor_state *
+static inline struct pipe_scissor_state *
fd_context_get_scissor(struct fd_context *ctx)
{
if (ctx->rasterizer && ctx->rasterizer->scissor)
@@ -370,13 +393,13 @@ fd_context_get_scissor(struct fd_context *ctx)
return &ctx->disabled_scissor;
}
-static INLINE bool
+static inline bool
fd_supported_prim(struct fd_context *ctx, unsigned prim)
{
return (1 << prim) & ctx->primtype_mask;
}
-static INLINE void
+static inline void
fd_reset_wfi(struct fd_context *ctx)
{
ctx->needs_wfi = true;
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index c9e317c7dc9..6831a58749c 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -40,7 +40,8 @@
#include "freedreno_util.h"
static void
-resource_reading(struct fd_context *ctx, struct pipe_resource *prsc)
+resource_used(struct fd_context *ctx, struct pipe_resource *prsc,
+ enum fd_resource_status status)
{
struct fd_resource *rsc;
@@ -48,9 +49,29 @@ resource_reading(struct fd_context *ctx, struct pipe_resource *prsc)
return;
rsc = fd_resource(prsc);
- rsc->reading = true;
+ rsc->status |= status;
+ if (rsc->stencil)
+ rsc->stencil->status |= status;
+
+ /* TODO resources can actually be shared across contexts,
+ * so I'm not sure a single list-head will do the trick?
+ */
+ debug_assert((rsc->pending_ctx == ctx) || !rsc->pending_ctx);
list_delinit(&rsc->list);
list_addtail(&rsc->list, &ctx->used_resources);
+ rsc->pending_ctx = ctx;
+}
+
+static void
+resource_read(struct fd_context *ctx, struct pipe_resource *prsc)
+{
+ resource_used(ctx, prsc, FD_PENDING_READ);
+}
+
+static void
+resource_written(struct fd_context *ctx, struct pipe_resource *prsc)
+{
+ resource_used(ctx, prsc, FD_PENDING_WRITE);
}
static void
@@ -59,7 +80,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
struct fd_context *ctx = fd_context(pctx);
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
- unsigned i, buffers = 0;
+ unsigned i, prims, buffers = 0;
/* if we supported transform feedback, we'd have to disable this: */
if (((scissor->maxx - scissor->minx) *
@@ -69,6 +90,8 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
/* emulate unsupported primitives: */
if (!fd_supported_prim(ctx, info->mode)) {
+ if (ctx->streamout.num_targets > 0)
+ debug_error("stream-out with emulated prims");
util_primconvert_save_index_buffer(ctx->primconvert, &ctx->indexbuf);
util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer);
util_primconvert_draw_vbo(ctx->primconvert, info);
@@ -83,17 +106,13 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
if (fd_depth_enabled(ctx)) {
buffers |= FD_BUFFER_DEPTH;
- fd_resource(pfb->zsbuf->texture)->dirty = true;
+ resource_written(ctx, pfb->zsbuf->texture);
ctx->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
}
if (fd_stencil_enabled(ctx)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
buffers |= FD_BUFFER_STENCIL;
- if (rsc->stencil)
- rsc->stencil->dirty = true;
- else
- rsc->dirty = true;
+ resource_written(ctx, pfb->zsbuf->texture);
ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
}
@@ -108,7 +127,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
surf = pfb->cbufs[i]->texture;
- fd_resource(surf)->dirty = true;
+ resource_written(ctx, surf);
buffers |= PIPE_CLEAR_COLOR0 << i;
if (surf->nr_samples > 1)
@@ -120,32 +139,38 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
/* Skip over buffer 0, that is sent along with the command stream */
for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- resource_reading(ctx, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
- resource_reading(ctx, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
+ resource_read(ctx, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
+ resource_read(ctx, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
}
/* Mark VBOs as being read */
for (i = 0; i < ctx->vtx.vertexbuf.count; i++) {
assert(!ctx->vtx.vertexbuf.vb[i].user_buffer);
- resource_reading(ctx, ctx->vtx.vertexbuf.vb[i].buffer);
+ resource_read(ctx, ctx->vtx.vertexbuf.vb[i].buffer);
}
/* Mark index buffer as being read */
- resource_reading(ctx, ctx->indexbuf.buffer);
+ resource_read(ctx, ctx->indexbuf.buffer);
/* Mark textures as being read */
for (i = 0; i < ctx->verttex.num_textures; i++)
if (ctx->verttex.textures[i])
- resource_reading(ctx, ctx->verttex.textures[i]->texture);
+ resource_read(ctx, ctx->verttex.textures[i]->texture);
for (i = 0; i < ctx->fragtex.num_textures; i++)
if (ctx->fragtex.textures[i])
- resource_reading(ctx, ctx->fragtex.textures[i]->texture);
+ resource_read(ctx, ctx->fragtex.textures[i]->texture);
+
+ /* Mark streamout buffers as being written.. */
+ for (i = 0; i < ctx->streamout.num_targets; i++)
+ if (ctx->streamout.targets[i])
+ resource_written(ctx, ctx->streamout.targets[i]->buffer);
ctx->num_draws++;
+ prims = u_reduced_prims_for_vertices(info->mode, info->count);
+
ctx->stats.draw_calls++;
- ctx->stats.prims_emitted +=
- u_reduced_prims_for_vertices(info->mode, info->count);
+ ctx->stats.prims_emitted += prims;
/* any buffers that haven't been cleared yet, we need to restore: */
ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared);
@@ -159,6 +184,9 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW);
ctx->draw_vbo(ctx, info);
+ for (i = 0; i < ctx->streamout.num_targets; i++)
+ ctx->streamout.offsets[i] += prims;
+
/* if an app (or, well, piglit test) does many thousands of draws
* without flush (or anything which implicitly flushes, like
* changing render targets), we can exceed the ringbuffer size.
@@ -216,15 +244,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR)
for (i = 0; i < pfb->nr_cbufs; i++)
if (buffers & (PIPE_CLEAR_COLOR0 << i))
- fd_resource(pfb->cbufs[i]->texture)->dirty = true;
+ resource_written(ctx, pfb->cbufs[i]->texture);
if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- if (rsc->stencil && buffers & PIPE_CLEAR_STENCIL)
- rsc->stencil->dirty = true;
- if (!rsc->stencil || buffers & PIPE_CLEAR_DEPTH)
- rsc->dirty = true;
-
+ resource_written(ctx, pfb->zsbuf->texture);
ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
}
@@ -242,7 +265,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
FD_DIRTY_SAMPLE_MASK |
FD_DIRTY_PROG |
FD_DIRTY_CONSTBUF |
- FD_DIRTY_BLEND;
+ FD_DIRTY_BLEND |
+ FD_DIRTY_FRAMEBUFFER;
if (fd_mesa_debug & FD_DBG_DCLEAR)
ctx->dirty = 0xffffffff;
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c b/src/gallium/drivers/freedreno/freedreno_fence.c
index 375e58f7022..04a9feacd58 100644
--- a/src/gallium/drivers/freedreno/freedreno_fence.c
+++ b/src/gallium/drivers/freedreno/freedreno_fence.c
@@ -69,6 +69,9 @@ boolean fd_screen_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
+ if (!timeout)
+ return fd_screen_fence_signalled(screen, fence);
+
if (fd_pipe_wait(fence->screen->pipe, fence->timestamp))
return false;
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index c105378ec4e..648db9baee5 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -82,7 +82,7 @@ total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
{
uint32_t total = 0, i;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < MAX_RENDER_TARGETS; i++) {
if (cbuf_cpp[i]) {
gmem->cbuf_base[i] = align(total, 0x4000);
total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
@@ -113,7 +113,7 @@ calculate_tiles(struct fd_context *ctx)
uint32_t nbins_x = 1, nbins_y = 1;
uint32_t bin_w, bin_h;
uint32_t max_width = bin_width(ctx);
- uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp[2] = {0};
+ uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
uint32_t i, j, t, xoff, yoff;
uint32_t tpp_x, tpp_y;
bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
@@ -162,12 +162,17 @@ calculate_tiles(struct fd_context *ctx)
bin_w = align(width / nbins_x, 32);
}
+ if (fd_mesa_debug & FD_DBG_MSGS) {
+ debug_printf("binning input: cbuf cpp:");
+ for (i = 0; i < pfb->nr_cbufs; i++)
+ debug_printf(" %d", cbuf_cpp[i]);
+ debug_printf(", zsbuf cpp: %d; %dx%d\n",
+ zsbuf_cpp[0], width, height);
+ }
+
/* then find a bin width/height that satisfies the memory
* constraints:
*/
- DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d",
- cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp[0],
- width, height);
while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
if (bin_w > bin_h) {
nbins_x++;
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h
index 5867235db90..38b557eb077 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.h
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.h
@@ -31,6 +31,8 @@
#include "pipe/p_context.h"
+#include "freedreno_util.h"
+
/* per-pipe configuration for hw binning: */
struct fd_vsc_pipe {
struct fd_bo *bo;
@@ -47,9 +49,9 @@ struct fd_tile {
struct fd_gmem_stateobj {
struct pipe_scissor_state scissor;
- uint32_t cbuf_base[4];
+ uint32_t cbuf_base[MAX_RENDER_TARGETS];
uint32_t zsbuf_base[2];
- uint8_t cbuf_cpp[4];
+ uint8_t cbuf_cpp[MAX_RENDER_TARGETS];
uint8_t zsbuf_cpp[2];
uint16_t bin_h, nbins_y;
uint16_t bin_w, nbins_x;
diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c
index 5e344e69146..e6a647852a3 100644
--- a/src/gallium/drivers/freedreno/freedreno_program.c
+++ b/src/gallium/drivers/freedreno/freedreno_program.c
@@ -96,7 +96,11 @@ fd_prog_blit(struct pipe_context *pctx, int rts, bool depth)
{
int i;
struct ureg_src tc;
- struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ struct ureg_program *ureg;
+
+ debug_assert(rts <= MAX_RENDER_TARGETS);
+
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
if (!ureg)
return NULL;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 95f79df565e..709ad4eb55b 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -42,6 +42,14 @@
#include <errno.h>
+
+static bool
+pending(struct fd_resource *rsc, enum fd_resource_status status)
+{
+ return (rsc->status & status) ||
+ (rsc->stencil && (rsc->stencil->status & status));
+}
+
static void
fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
{
@@ -72,11 +80,11 @@ fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
/* Textures */
for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
- if (ctx->verttex.textures[i]->texture == prsc)
+ if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
ctx->dirty |= FD_DIRTY_VERTTEX;
}
for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
- if (ctx->fragtex.textures[i]->texture == prsc)
+ if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
ctx->dirty |= FD_DIRTY_FRAGTEX;
}
}
@@ -97,7 +105,8 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
rsc->bo = fd_bo_new(screen->dev, size, flags);
rsc->timestamp = 0;
- rsc->dirty = rsc->reading = false;
+ rsc->status = 0;
+ rsc->pending_ctx = NULL;
list_delinit(&rsc->list);
util_range_set_empty(&rsc->valid_buffer_range);
}
@@ -238,8 +247,9 @@ fd_resource_transfer_map(struct pipe_context *pctx,
/* If the GPU is writing to the resource, or if it is reading from the
* resource and we're trying to write to it, flush the renders.
*/
- if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty) ||
- ((ptrans->usage & PIPE_TRANSFER_WRITE) && rsc->reading))
+ if (((ptrans->usage & PIPE_TRANSFER_WRITE) &&
+ pending(rsc, FD_PENDING_READ | FD_PENDING_WRITE)) ||
+ pending(rsc, FD_PENDING_WRITE))
fd_context_render(pctx);
/* The GPU keeps track of how the various bo's are being used, and
@@ -646,6 +656,8 @@ fd_blitter_pipe_begin(struct fd_context *ctx)
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
+ util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
+ ctx->streamout.targets);
util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
@@ -675,7 +687,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
{
struct fd_resource *rsc = fd_resource(prsc);
- if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty))
+ if (pending(rsc, FD_PENDING_WRITE | FD_PENDING_READ))
fd_context_render(pctx);
}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 0634923fcb2..7549becaa1f 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -60,6 +60,15 @@ struct fd_resource_slice {
uint32_t size0; /* size of first layer in slice */
};
+/* status of queued up but not flushed reads and write operations.
+ * In _transfer_map() we need to know if queued up rendering needs
+ * to be flushed to preserve the order of cpu and gpu access.
+ */
+enum fd_resource_status {
+ FD_PENDING_WRITE = 0x01,
+ FD_PENDING_READ = 0x02,
+};
+
struct fd_resource {
struct u_resource base;
struct fd_bo *bo;
@@ -68,17 +77,23 @@ struct fd_resource {
uint32_t layer_size;
struct fd_resource_slice slices[MAX_MIP_LEVELS];
uint32_t timestamp;
- bool dirty, reading;
/* buffer range that has been initialized */
struct util_range valid_buffer_range;
/* reference to the resource holding stencil data for a z32_s8 texture */
+ /* TODO rename to secondary or auxiliary? */
struct fd_resource *stencil;
+ /* pending read/write state: */
+ enum fd_resource_status status;
+ /* resources accessed by queued but not flushed draws are tracked
+ * in the used_resources list.
+ */
struct list_head list;
+ struct fd_context *pending_ctx;
};
-static INLINE struct fd_resource *
+static inline struct fd_resource *
fd_resource(struct pipe_resource *ptex)
{
return (struct fd_resource *)ptex;
@@ -89,13 +104,13 @@ struct fd_transfer {
void *staging;
};
-static INLINE struct fd_transfer *
+static inline struct fd_transfer *
fd_transfer(struct pipe_transfer *ptrans)
{
return (struct fd_transfer *)ptrans;
}
-static INLINE struct fd_resource_slice *
+static inline struct fd_resource_slice *
fd_resource_slice(struct fd_resource *rsc, unsigned level)
{
assert(level <= rsc->base.b.last_level);
@@ -103,7 +118,7 @@ fd_resource_slice(struct fd_resource *rsc, unsigned level)
}
/* get offset for specified mipmap level and texture/array layer */
-static INLINE uint32_t
+static inline uint32_t
fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
{
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index b3b5462b437..b55f5b36ca9 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -68,7 +68,8 @@ static const struct debug_named_value debug_options[] = {
{"fraghalf", FD_DBG_FRAGHALF, "Use half-precision in fragment shader"},
{"nobin", FD_DBG_NOBIN, "Disable hw binning"},
{"optmsgs", FD_DBG_OPTMSGS,"Enable optimizer debug messages"},
- {"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 120 (rather than 130) on a3xx+"},
+ {"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 1.20 (rather than 1.30) on a3xx+"},
+ {"shaderdb", FD_DBG_SHADERDB, "Enable shaderdb output"},
DEBUG_NAMED_VALUE_END
};
@@ -163,9 +164,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_CUBE_MAP_ARRAY:
- case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
- case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_COMPUTE:
@@ -175,10 +173,23 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
- return is_a3xx(screen) || is_a4xx(screen);
-
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ return is_a3xx(screen) || is_a4xx(screen);
+
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ /* ignoring first/last_element.. but I guess that should be
+ * easy to add..
+ */
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ /* I think 32k on a4xx.. and we could possibly emulate more
+ * by pretending 2d/rect textures and splitting high bits
+ * of index into 2nd dimension..
+ */
+ return 16383;
+
case PIPE_CAP_DEPTH_CLIP_DISABLE:
return is_a3xx(screen);
@@ -188,7 +199,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (glsl120)
return 120;
- return (is_a3xx(screen) || is_a4xx(screen)) ? 130 : 120;
+ return is_ir3(screen) ? 130 : 120;
/* Unsupported features. */
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
@@ -218,6 +229,10 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -225,9 +240,17 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* Stream output. */
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ if (is_ir3(screen))
+ return PIPE_MAX_SO_BUFFERS;
+ return 0;
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ if (is_ir3(screen))
+ return 1;
+ return 0;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ if (is_ir3(screen))
+ return 16 * 4; /* should only be shader out limit? */
return 0;
/* Geometry shader output, unsupported. */
@@ -258,9 +281,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_QUERY_TIMESTAMP:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
- /* TODO still missing on a4xx, but we lie to get gl2..
- * it's not a feature, it's a bug!
- */
return is_a3xx(screen) || is_a4xx(screen);
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
@@ -357,7 +377,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
*/
return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return (is_a3xx(screen) || is_a4xx(screen)) ? 16 : 1;
+ return is_ir3(screen) ? 16 : 1;
case PIPE_SHADER_CAP_MAX_PREDS:
return 0; /* nothing uses this */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -379,7 +399,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_INTEGERS:
if (glsl120)
return 0;
- return (is_a3xx(screen) || is_a4xx(screen)) ? 1 : 0;
+ return is_ir3(screen) ? 1 : 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16;
@@ -546,7 +566,6 @@ fd_screen_create(struct fd_device *dev)
pscreen->get_timestamp = fd_screen_get_timestamp;
pscreen->fence_reference = fd_screen_fence_ref;
- pscreen->fence_signalled = fd_screen_fence_signalled;
pscreen->fence_finish = fd_screen_fence_finish;
util_format_s3tc_init();
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index dbc2808262a..4e5c3a61958 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -56,7 +56,7 @@ struct fd_screen {
int64_t cpu_gpu_time_delta;
};
-static INLINE struct fd_screen *
+static inline struct fd_screen *
fd_screen(struct pipe_screen *pscreen)
{
return (struct fd_screen *)pscreen;
@@ -73,6 +73,7 @@ struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen,
struct pipe_screen * fd_screen_create(struct fd_device *dev);
/* is a3xx patch revision 0? */
+/* TODO a306.0 probably doesn't need this.. be more clever?? */
static inline boolean
is_a3xx_p0(struct fd_screen *screen)
{
@@ -91,4 +92,11 @@ is_a4xx(struct fd_screen *screen)
return (screen->gpu_id >= 400) && (screen->gpu_id < 500);
}
+/* is it using the ir3 compiler (shader isa introduced with a3xx)? */
+static inline boolean
+is_ir3(struct fd_screen *screen)
+{
+ return is_a3xx(screen) || is_a4xx(screen);
+}
+
#endif /* FREEDRENO_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 77aa4f21d3b..7bf8bdb4507 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -300,6 +300,67 @@ fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
ctx->dirty |= FD_DIRTY_VTXSTATE;
}
+static struct pipe_stream_output_target *
+fd_create_stream_output_target(struct pipe_context *pctx,
+ struct pipe_resource *prsc, unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct pipe_stream_output_target *target;
+
+ target = CALLOC_STRUCT(pipe_stream_output_target);
+ if (!target)
+ return NULL;
+
+ pipe_reference_init(&target->reference, 1);
+ pipe_resource_reference(&target->buffer, prsc);
+
+ target->context = pctx;
+ target->buffer_offset = buffer_offset;
+ target->buffer_size = buffer_size;
+
+ return target;
+}
+
+static void
+fd_stream_output_target_destroy(struct pipe_context *pctx,
+ struct pipe_stream_output_target *target)
+{
+ pipe_resource_reference(&target->buffer, NULL);
+ FREE(target);
+}
+
+static void
+fd_set_stream_output_targets(struct pipe_context *pctx,
+ unsigned num_targets, struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+ unsigned i;
+
+ debug_assert(num_targets <= ARRAY_SIZE(so->targets));
+
+ for (i = 0; i < num_targets; i++) {
+ boolean changed = targets[i] != so->targets[i];
+ boolean append = (offsets[i] == (unsigned)-1);
+
+ if (!changed && append)
+ continue;
+
+ so->offsets[i] = 0;
+
+ pipe_so_target_reference(&so->targets[i], targets[i]);
+ }
+
+ for (; i < so->num_targets; i++) {
+ pipe_so_target_reference(&so->targets[i], NULL);
+ }
+
+ so->num_targets = num_targets;
+
+ ctx->dirty |= FD_DIRTY_STREAMOUT;
+}
+
void
fd_state_init(struct pipe_context *pctx)
{
@@ -328,4 +389,8 @@ fd_state_init(struct pipe_context *pctx)
pctx->create_vertex_elements_state = fd_vertex_state_create;
pctx->delete_vertex_elements_state = fd_vertex_state_delete;
pctx->bind_vertex_elements_state = fd_vertex_state_bind;
+
+ pctx->create_stream_output_target = fd_create_stream_output_target;
+ pctx->stream_output_target_destroy = fd_stream_output_target_destroy;
+ pctx->set_stream_output_targets = fd_set_stream_output_targets;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_surface.c b/src/gallium/drivers/freedreno/freedreno_surface.c
index 250fe4bc0f5..70c44eb79c3 100644
--- a/src/gallium/drivers/freedreno/freedreno_surface.c
+++ b/src/gallium/drivers/freedreno/freedreno_surface.c
@@ -41,7 +41,8 @@ fd_create_surface(struct pipe_context *pctx,
// struct fd_resource* tex = fd_resource(ptex);
struct fd_surface* surface = CALLOC_STRUCT(fd_surface);
- assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+ debug_assert(ptex->target != PIPE_BUFFER);
+ debug_assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
if (surface) {
struct pipe_surface *psurf = &surface->base;
diff --git a/src/gallium/drivers/freedreno/freedreno_surface.h b/src/gallium/drivers/freedreno/freedreno_surface.h
index 3293f33dd84..2de37cee2dd 100644
--- a/src/gallium/drivers/freedreno/freedreno_surface.h
+++ b/src/gallium/drivers/freedreno/freedreno_surface.h
@@ -40,7 +40,7 @@ struct fd_surface {
uint16_t depth;
};
-static INLINE struct fd_surface *
+static inline struct fd_surface *
fd_surface(struct pipe_surface *psurf)
{
return (struct fd_surface *)psurf;
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index deb0e602ce2..7129a1bddd1 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -40,6 +40,7 @@
#include "util/u_dynarray.h"
#include "util/u_pack_color.h"
+#include "disasm.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
@@ -53,6 +54,12 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
/* TBD if it is same on a2xx, but for now: */
#define MAX_MIP_LEVELS A3XX_MAX_MIP_LEVELS
+#define A2XX_MAX_RENDER_TARGETS 1
+#define A3XX_MAX_RENDER_TARGETS 4
+#define A4XX_MAX_RENDER_TARGETS 8
+
+#define MAX_RENDER_TARGETS A4XX_MAX_RENDER_TARGETS
+
#define FD_DBG_MSGS 0x0001
#define FD_DBG_DISASM 0x0002
#define FD_DBG_DCLEAR 0x0004
@@ -64,6 +71,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
#define FD_DBG_NOBIN 0x0100
#define FD_DBG_OPTMSGS 0x0200
#define FD_DBG_GLSL120 0x0400
+#define FD_DBG_SHADERDB 0x0800
extern int fd_mesa_debug;
extern bool fd_binning_enabled;
@@ -108,6 +116,58 @@ pipe_surface_format(struct pipe_surface *psurf)
return psurf->format;
}
+static inline bool
+fd_surface_half_precision(const struct pipe_surface *psurf)
+{
+ enum pipe_format format;
+
+ if (!psurf)
+ return true;
+
+ format = psurf->format;
+
+ /* colors are provided in consts, which go through cov.f32f16, which will
+ * break these values
+ */
+ if (util_format_is_pure_integer(format))
+ return false;
+
+ /* avoid losing precision on 32-bit float formats */
+ if (util_format_is_float(format) &&
+ util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
+ return false;
+
+ return true;
+}
+
+static inline unsigned
+fd_sampler_first_level(const struct pipe_sampler_view *view)
+{
+ if (view->target == PIPE_BUFFER)
+ return 0;
+ return view->u.tex.first_level;
+}
+
+static inline unsigned
+fd_sampler_last_level(const struct pipe_sampler_view *view)
+{
+ if (view->target == PIPE_BUFFER)
+ return 0;
+ return view->u.tex.last_level;
+}
+
+static inline bool
+fd_half_precision(struct pipe_framebuffer_state *pfb)
+{
+ unsigned i;
+
+ for (i = 0; i < pfb->nr_cbufs; i++)
+ if (!fd_surface_half_precision(pfb->cbufs[i]))
+ return false;
+
+ return true;
+}
+
#define LOG_DWORDS 0
static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
index 48ae7c71b9f..83ed5ffdca0 100644
--- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
+++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
@@ -103,7 +103,7 @@ static void print_reg(reg_t reg, bool full, bool r, bool c, bool im,
} else if ((reg.num == REG_P0) && !c) {
printf("p0.%c", component[reg.comp]);
} else {
- printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+ printf("%s%c%d.%c", full ? "" : "h", type, reg.num & 0x3f, component[reg.comp]);
}
}
@@ -122,6 +122,32 @@ static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im,
print_reg(reg, full, r, c, im, neg, abs, addr_rel);
}
+/* TODO switch to using reginfo struct everywhere, since more readable
+ * than passing a bunch of bools to print_reg_src
+ */
+
+struct reginfo {
+ reg_t reg;
+ bool full;
+ bool r;
+ bool c;
+ bool im;
+ bool neg;
+ bool abs;
+ bool addr_rel;
+};
+
+static void print_src(struct reginfo *info)
+{
+ print_reg_src(info->reg, info->full, info->r, info->c, info->im,
+ info->neg, info->abs, info->addr_rel);
+}
+
+//static void print_dst(struct reginfo *info)
+//{
+// print_reg_dst(info->reg, info->full, info->addr_rel);
+//}
+
static void print_instr_cat0(instr_t *instr)
{
instr_cat0_t *cat0 = &instr->cat0;
@@ -454,10 +480,70 @@ static void print_instr_cat6(instr_t *instr)
{
instr_cat6_t *cat6 = &instr->cat6;
char sd = 0, ss = 0; /* dst/src address space */
- bool full = type_size(cat6->type) == 32;
bool nodst = false;
+ struct reginfo dst, src1, src2;
+ int src1off = 0, dstoff = 0;
- printf(".%s ", type[cat6->type]);
+ memset(&dst, 0, sizeof(dst));
+ memset(&src1, 0, sizeof(src1));
+ memset(&src2, 0, sizeof(src2));
+
+ switch (cat6->opc) {
+ case OPC_RESINFO:
+ case OPC_RESFMT:
+ dst.full = type_size(cat6->type) == 32;
+ src1.full = type_size(cat6->type) == 32;
+ src2.full = type_size(cat6->type) == 32;
+ break;
+ case OPC_L2G:
+ case OPC_G2L:
+ dst.full = true;
+ src1.full = true;
+ src2.full = true;
+ break;
+ case OPC_STG:
+ case OPC_STL:
+ case OPC_STP:
+ case OPC_STI:
+ case OPC_STLW:
+ case OPC_STGB_4D_4:
+ case OPC_STIB:
+ dst.full = true;
+ src1.full = type_size(cat6->type) == 32;
+ src2.full = type_size(cat6->type) == 32;
+ break;
+ default:
+ dst.full = type_size(cat6->type) == 32;
+ src1.full = true;
+ src2.full = true;
+ break;
+ }
+
+ switch (cat6->opc) {
+ case OPC_PREFETCH:
+ case OPC_RESINFO:
+ break;
+ case OPC_ATOMIC_ADD:
+ case OPC_ATOMIC_SUB:
+ case OPC_ATOMIC_XCHG:
+ case OPC_ATOMIC_INC:
+ case OPC_ATOMIC_DEC:
+ case OPC_ATOMIC_CMPXCHG:
+ case OPC_ATOMIC_MIN:
+ case OPC_ATOMIC_MAX:
+ case OPC_ATOMIC_AND:
+ case OPC_ATOMIC_OR:
+ case OPC_ATOMIC_XOR:
+ ss = cat6->g ? 'g' : 'l';
+ printf(".%c", ss);
+ printf(".%s", type[cat6->type]);
+ break;
+ default:
+ dst.im = cat6->g && !cat6->dst_off;
+ printf(".%s", type[cat6->type]);
+ break;
+ }
+ printf(" ");
switch (cat6->opc) {
case OPC_STG:
@@ -499,68 +585,65 @@ static void print_instr_cat6(instr_t *instr)
break;
case OPC_STI:
- full = false; // XXX or inverts??
+ dst.full = false; // XXX or inverts??
break;
}
- if (cat6->has_off) {
- if (!nodst) {
- if (sd)
- printf("%c[", sd);
- print_reg_dst((reg_t)(cat6->a.dst), full, false);
- if (sd)
- printf("]");
- printf(", ");
- }
- if (ss)
- printf("%c[", ss);
- print_reg_src((reg_t)(cat6->a.src1), true,
- false, false, cat6->a.src1_im, false, false, false);
- if (cat6->a.off)
- printf("%+d", cat6->a.off);
- if (ss)
- printf("]");
- printf(", ");
- print_reg_src((reg_t)(cat6->a.src2), full,
- false, false, cat6->a.src2_im, false, false, false);
+ if (cat6->dst_off) {
+ dst.reg = (reg_t)(cat6->c.dst);
+ dstoff = cat6->c.off;
} else {
- if (!nodst) {
- if (sd)
- printf("%c[", sd);
- print_reg_dst((reg_t)(cat6->b.dst), full, false);
- if (sd)
- printf("]");
- printf(", ");
- }
- if (ss)
- printf("%c[", ss);
- print_reg_src((reg_t)(cat6->b.src1), true,
- false, false, cat6->b.src1_im, false, false, false);
- if (ss)
+ dst.reg = (reg_t)(cat6->d.dst);
+ }
+
+ if (cat6->src_off) {
+ src1.reg = (reg_t)(cat6->a.src1);
+ src1.im = cat6->a.src1_im;
+ src2.reg = (reg_t)(cat6->a.src2);
+ src2.im = cat6->a.src2_im;
+ src1off = cat6->a.off;
+ } else {
+ src1.reg = (reg_t)(cat6->b.src1);
+ src1.im = cat6->b.src1_im;
+ src2.reg = (reg_t)(cat6->b.src2);
+ src2.im = cat6->b.src2_im;
+ }
+
+ if (!nodst) {
+ if (sd)
+ printf("%c[", sd);
+ /* note: dst might actually be a src (ie. address to store to) */
+ print_src(&dst);
+ if (dstoff)
+ printf("%+d", dstoff);
+ if (sd)
printf("]");
printf(", ");
- print_reg_src((reg_t)(cat6->b.src2), full,
- false, false, cat6->b.src2_im, false, false, false);
}
- if (debug & PRINT_VERBOSE) {
- switch (cat6->opc) {
- case OPC_LDG:
- case OPC_LDP:
- /* load instructions: */
- if (cat6->a.dummy2|cat6->a.dummy3)
- printf("\t{6: %x,%x}", cat6->a.dummy2, cat6->a.dummy3);
- break;
- case OPC_STG:
- case OPC_STP:
- case OPC_STI:
- /* store instructions: */
- if (cat6->b.dummy2|cat6->b.dummy2)
- printf("\t{6: %x,%x}", cat6->b.dummy2, cat6->b.dummy3);
- if (cat6->b.ignore0)
- printf("\t{?? %x}", cat6->b.ignore0);
- break;
- }
+ if (ss)
+ printf("%c[", ss);
+
+ /* can have a larger than normal immed, so hack: */
+ if (src1.im) {
+ printf("%u", src1.reg.dummy13);
+ } else {
+ print_src(&src1);
+ }
+
+ if (src1off)
+ printf("%+d", src1off);
+ if (ss)
+ printf("]");
+
+ switch (cat6->opc) {
+ case OPC_RESINFO:
+ case OPC_RESFMT:
+ break;
+ default:
+ printf(", ");
+ print_src(&src2);
+ break;
}
}
@@ -711,19 +794,19 @@ struct opc_info {
OPC(6, OPC_LDLW, ldlw),
OPC(6, OPC_STLW, stlw),
OPC(6, OPC_RESFMT, resfmt),
- OPC(6, OPC_RESINFO, resinf),
- OPC(6, OPC_ATOMIC_ADD_L, atomic.add.l),
- OPC(6, OPC_ATOMIC_SUB_L, atomic.sub.l),
- OPC(6, OPC_ATOMIC_XCHG_L, atomic.xchg.l),
- OPC(6, OPC_ATOMIC_INC_L, atomic.inc.l),
- OPC(6, OPC_ATOMIC_DEC_L, atomic.dec.l),
- OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l),
- OPC(6, OPC_ATOMIC_MIN_L, atomic.min.l),
- OPC(6, OPC_ATOMIC_MAX_L, atomic.max.l),
- OPC(6, OPC_ATOMIC_AND_L, atomic.and.l),
- OPC(6, OPC_ATOMIC_OR_L, atomic.or.l),
- OPC(6, OPC_ATOMIC_XOR_L, atomic.xor.l),
- OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.4d),
+ OPC(6, OPC_RESINFO, resinfo),
+ OPC(6, OPC_ATOMIC_ADD, atomic.add),
+ OPC(6, OPC_ATOMIC_SUB, atomic.sub),
+ OPC(6, OPC_ATOMIC_XCHG, atomic.xchg),
+ OPC(6, OPC_ATOMIC_INC, atomic.inc),
+ OPC(6, OPC_ATOMIC_DEC, atomic.dec),
+ OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
+ OPC(6, OPC_ATOMIC_MIN, atomic.min),
+ OPC(6, OPC_ATOMIC_MAX, atomic.max),
+ OPC(6, OPC_ATOMIC_AND, atomic.and),
+ OPC(6, OPC_ATOMIC_OR, atomic.or),
+ OPC(6, OPC_ATOMIC_XOR, atomic.xor),
+ OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.3d),
OPC(6, OPC_STGB_4D_4, stgb.4d.4),
OPC(6, OPC_STIB, stib),
OPC(6, OPC_LDC_4, ldc.4),
diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
index efb07ea479e..c3fb68d511c 100644
--- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
+++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
@@ -173,17 +173,17 @@ typedef enum {
OPC_STLW = 11,
OPC_RESFMT = 14,
OPC_RESINFO = 15,
- OPC_ATOMIC_ADD_L = 16,
- OPC_ATOMIC_SUB_L = 17,
- OPC_ATOMIC_XCHG_L = 18,
- OPC_ATOMIC_INC_L = 19,
- OPC_ATOMIC_DEC_L = 20,
- OPC_ATOMIC_CMPXCHG_L = 21,
- OPC_ATOMIC_MIN_L = 22,
- OPC_ATOMIC_MAX_L = 23,
- OPC_ATOMIC_AND_L = 24,
- OPC_ATOMIC_OR_L = 25,
- OPC_ATOMIC_XOR_L = 26,
+ OPC_ATOMIC_ADD = 16,
+ OPC_ATOMIC_SUB = 17,
+ OPC_ATOMIC_XCHG = 18,
+ OPC_ATOMIC_INC = 19,
+ OPC_ATOMIC_DEC = 20,
+ OPC_ATOMIC_CMPXCHG = 21,
+ OPC_ATOMIC_MIN = 22,
+ OPC_ATOMIC_MAX = 23,
+ OPC_ATOMIC_AND = 24,
+ OPC_ATOMIC_OR = 25,
+ OPC_ATOMIC_XOR = 26,
OPC_LDGB_TYPED_4D = 27,
OPC_STGB_4D_4 = 28,
OPC_STIB = 29,
@@ -575,7 +575,7 @@ typedef struct PACKED {
uint32_t opc_cat : 3;
} instr_cat5_t;
-/* [src1 + off], src2: */
+/* dword0 encoding for src_off: [src1 + off], src2: */
typedef struct PACKED {
/* dword0: */
uint32_t mustbe1 : 1;
@@ -586,37 +586,50 @@ typedef struct PACKED {
uint32_t src2 : 8;
/* dword1: */
- uint32_t dst : 8;
- uint32_t dummy2 : 9;
- uint32_t type : 3;
- uint32_t dummy3 : 2;
- uint32_t opc : 5;
- uint32_t jmp_tgt : 1;
- uint32_t sync : 1;
- uint32_t opc_cat : 3;
+ uint32_t dword1;
} instr_cat6a_t;
-/* [src1], src2: */
+/* dword0 encoding for !src_off: [src1], src2 */
typedef struct PACKED {
/* dword0: */
uint32_t mustbe0 : 1;
- uint32_t src1 : 8;
- uint32_t ignore0 : 13;
+ uint32_t src1 : 13;
+ uint32_t ignore0 : 8;
uint32_t src1_im : 1;
uint32_t src2_im : 1;
uint32_t src2 : 8;
/* dword1: */
- uint32_t dst : 8;
- uint32_t dummy2 : 9;
- uint32_t type : 3;
- uint32_t dummy3 : 2;
- uint32_t opc : 5;
- uint32_t jmp_tgt : 1;
- uint32_t sync : 1;
- uint32_t opc_cat : 3;
+ uint32_t dword1;
} instr_cat6b_t;
+/* dword1 encoding for dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ /* note: there is some weird stuff going on where sometimes
+ * cat6->a.off is involved.. but that seems like a bug in
+ * the blob, since it is used even if !cat6->src_off
+ * It would make sense for there to be some more bits to
+ * bring us to 11 bits worth of offset, but not sure..
+ */
+ int32_t off : 8;
+ uint32_t mustbe1 : 1;
+ uint32_t dst : 8;
+ uint32_t pad1 : 15;
+} instr_cat6c_t;
+
+/* dword1 encoding for !dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ uint32_t dst : 8;
+ uint32_t mustbe0 : 1;
+ uint32_t pad0 : 23;
+} instr_cat6d_t;
+
/* I think some of the other cat6 instructions use additional
* sub-encodings..
*/
@@ -624,16 +637,20 @@ typedef struct PACKED {
typedef union PACKED {
instr_cat6a_t a;
instr_cat6b_t b;
+ instr_cat6c_t c;
+ instr_cat6d_t d;
struct PACKED {
/* dword0: */
- uint32_t has_off : 1;
+ uint32_t src_off : 1;
uint32_t pad1 : 31;
/* dword1: */
- uint32_t dst : 8;
- uint32_t dummy2 : 9;
+ uint32_t pad2 : 8;
+ uint32_t dst_off : 1;
+ uint32_t pad3 : 8;
uint32_t type : 3;
- uint32_t dummy3 : 2;
+ uint32_t g : 1; /* or in some cases it means dst immed */
+ uint32_t pad4 : 1;
uint32_t opc : 5;
uint32_t jmp_tgt : 1;
uint32_t sync : 1;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index a166b67d7cf..b24825cff85 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -499,32 +499,51 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
static int emit_cat6(struct ir3_instruction *instr, void *ptr,
struct ir3_info *info)
{
- struct ir3_register *dst = instr->regs[0];
- struct ir3_register *src1 = instr->regs[1];
- struct ir3_register *src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+ struct ir3_register *dst, *src1, *src2;
instr_cat6_t *cat6 = ptr;
- iassert(instr->regs_count >= 2);
+ /* the "dst" for a store instruction is (from the perspective
+ * of data flow in the shader, ie. register use/def, etc) in
+ * fact a register that is read by the instruction, rather
+ * than written:
+ */
+ if (is_store(instr)) {
+ iassert(instr->regs_count >= 3);
- if (instr->cat6.offset || instr->opc == OPC_LDG) {
+ dst = instr->regs[1];
+ src1 = instr->regs[2];
+ src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
+ } else {
+ iassert(instr->regs_count >= 2);
+
+ dst = instr->regs[0];
+ src1 = instr->regs[1];
+ src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+ }
+
+
+ /* TODO we need a more comprehensive list about which instructions
+ * can be encoded which way. Or possibly use IR3_INSTR_0 flag to
+ * indicate to use the src_off encoding even if offset is zero
+ * (but then what to do about dst_off?)
+ */
+ if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
instr_cat6a_t *cat6a = ptr;
- cat6->has_off = true;
+ cat6->src_off = true;
- cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
if (src2) {
cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
}
- cat6a->off = instr->cat6.offset;
+ cat6a->off = instr->cat6.src_offset;
} else {
instr_cat6b_t *cat6b = ptr;
- cat6->has_off = false;
+ cat6->src_off = false;
- cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
if (src2) {
@@ -533,10 +552,22 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
}
}
+ if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) {
+ instr_cat6c_t *cat6c = ptr;
+ cat6->dst_off = true;
+ cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ cat6c->off = instr->cat6.dst_offset;
+ } else {
+ instr_cat6d_t *cat6d = ptr;
+ cat6->dst_off = false;
+ cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ }
+
cat6->type = instr->cat6.type;
cat6->opc = instr->opc;
cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
cat6->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat6->g = !!(instr->flags & IR3_INSTR_G);
cat6->opc_cat = 6;
return 0;
@@ -669,7 +700,6 @@ struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
return ir3_instr_create2(block, category, opc, 4);
}
-/* only used by old compiler: */
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
{
struct ir3_instruction *new_instr = instr_create(instr->block,
@@ -708,6 +738,17 @@ struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
}
void
+ir3_instr_set_address(struct ir3_instruction *instr,
+ struct ir3_instruction *addr)
+{
+ if (instr->address != addr) {
+ struct ir3 *ir = instr->block->shader;
+ instr->address = addr;
+ array_insert(ir->indirects, instr);
+ }
+}
+
+void
ir3_block_clear_mark(struct ir3_block *block)
{
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
@@ -723,15 +764,16 @@ ir3_clear_mark(struct ir3 *ir)
}
/* note: this will destroy instr->depth, don't do it until after sched! */
-void
+unsigned
ir3_count_instructions(struct ir3 *ir)
{
- unsigned ip = 0;
+ unsigned cnt = 0;
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- instr->ip = ip++;
+ instr->ip = cnt++;
}
block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
}
+ return cnt;
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 9c35a763d58..12f2ebe18db 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -172,6 +172,7 @@ struct ir3_instruction {
IR3_INSTR_P = 0x080,
IR3_INSTR_S = 0x100,
IR3_INSTR_S2EN = 0x200,
+ IR3_INSTR_G = 0x400,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
@@ -209,7 +210,8 @@ struct ir3_instruction {
} cat5;
struct {
type_t type;
- int offset;
+ int src_offset;
+ int dst_offset;
int iim_val;
} cat6;
/* for meta-instructions, just used to hold extra data
@@ -285,6 +287,8 @@ struct ir3_instruction {
/* an instruction can reference at most one address register amongst
* it's src/dst registers. Beyond that, you need to insert mov's.
+ *
+ * NOTE: do not write this directly, use ir3_instr_set_address()
*/
struct ir3_instruction *address;
@@ -365,6 +369,12 @@ struct ir3 {
unsigned predicates_count, predicates_sz;
struct ir3_instruction **predicates;
+ /* Track instructions which do not write a register but other-
+ * wise must not be discarded (such as kill, stg, etc)
+ */
+ unsigned keeps_count, keeps_sz;
+ struct ir3_instruction **keeps;
+
/* List of blocks: */
struct list_head block_list;
@@ -420,6 +430,9 @@ const char *ir3_instr_name(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
+void ir3_instr_set_address(struct ir3_instruction *instr,
+ struct ir3_instruction *addr);
+
static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
{
if (instr->flags & IR3_INSTR_MARK)
@@ -431,7 +444,7 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
void ir3_block_clear_mark(struct ir3_block *block);
void ir3_clear_mark(struct ir3 *shader);
-void ir3_count_instructions(struct ir3 *ir);
+unsigned ir3_count_instructions(struct ir3 *ir);
static inline int ir3_instr_regno(struct ir3_instruction *instr,
struct ir3_register *reg)
@@ -547,6 +560,26 @@ is_store(struct ir3_instruction *instr)
return false;
}
+static inline bool is_load(struct ir3_instruction *instr)
+{
+ if (is_mem(instr)) {
+ switch (instr->opc) {
+ case OPC_LDG:
+ case OPC_LDL:
+ case OPC_LDP:
+ case OPC_L2G:
+ case OPC_LDLW:
+ case OPC_LDC_4:
+ case OPC_LDLV:
+ /* probably some others too.. */
+ return true;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
static inline bool is_input(struct ir3_instruction *instr)
{
/* in some cases, ldlv is used to fetch varying without
@@ -1036,6 +1069,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
/* cat6 instructions: */
INSTR2(6, LDLV)
INSTR2(6, LDG)
+INSTR3(6, STG)
/* ************************************************************************* */
/* split this out or find some helper to use.. like main/bitset.h.. */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index ad9d2719d59..ede29f445dc 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -43,127 +43,15 @@
#include "instr-a3xx.h"
#include "ir3.h"
-static void dump_reg(const char *name, uint32_t r)
-{
- if (r != regid(63,0))
- debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
-}
-
-static void dump_semantic(struct ir3_shader_variant *so,
- unsigned sem, const char *name)
-{
- uint32_t regid;
- regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
- dump_reg(name, regid);
-}
-
static void dump_info(struct ir3_shader_variant *so, const char *str)
{
uint32_t *bin;
- const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG";
-
- // for debug, dump some before/after info:
+ const char *type = ir3_shader_stage(so->shader);
// TODO make gpu_id configurable on cmdline
bin = ir3_shader_assemble(so, 320);
- if (fd_mesa_debug & FD_DBG_DISASM) {
- struct ir3 *ir = so->ir;
- struct ir3_register *reg;
- uint8_t regid;
- unsigned i;
-
- debug_printf("; %s: %s\n", type, str);
-
- for (i = 0; i < ir->ninputs; i++) {
- if (!ir->inputs[i]) {
- debug_printf("; in%d unused\n", i);
- continue;
- }
- reg = ir->inputs[i]->regs[0];
- regid = reg->num;
- debug_printf("@in(%sr%d.%c)\tin%d\n",
- (reg->flags & IR3_REG_HALF) ? "h" : "",
- (regid >> 2), "xyzw"[regid & 0x3], i);
- }
-
- for (i = 0; i < ir->noutputs; i++) {
- if (!ir->outputs[i]) {
- debug_printf("; out%d unused\n", i);
- continue;
- }
- /* kill shows up as a virtual output.. skip it! */
- if (is_kill(ir->outputs[i]))
- continue;
- reg = ir->outputs[i]->regs[0];
- regid = reg->num;
- debug_printf("@out(%sr%d.%c)\tout%d\n",
- (reg->flags & IR3_REG_HALF) ? "h" : "",
- (regid >> 2), "xyzw"[regid & 0x3], i);
- }
-
- for (i = 0; i < so->immediates_count; i++) {
- debug_printf("@const(c%d.x)\t", so->first_immediate + i);
- debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
- so->immediates[i].val[0],
- so->immediates[i].val[1],
- so->immediates[i].val[2],
- so->immediates[i].val[3]);
- }
-
- disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
-
- debug_printf("; %s: outputs:", type);
- for (i = 0; i < so->outputs_count; i++) {
- uint8_t regid = so->outputs[i].regid;
- ir3_semantic sem = so->outputs[i].semantic;
- debug_printf(" r%d.%c (%u:%u)",
- (regid >> 2), "xyzw"[regid & 0x3],
- sem2name(sem), sem2idx(sem));
- }
- debug_printf("\n");
- debug_printf("; %s: inputs:", type);
- for (i = 0; i < so->inputs_count; i++) {
- uint8_t regid = so->inputs[i].regid;
- ir3_semantic sem = so->inputs[i].semantic;
- debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
- (regid >> 2), "xyzw"[regid & 0x3],
- sem2name(sem), sem2idx(sem),
- so->inputs[i].compmask,
- so->inputs[i].inloc,
- so->inputs[i].bary);
- }
- debug_printf("\n");
- }
-
- /* print generic shader info: */
- debug_printf("; %s: %u instructions, %d half, %d full\n", type,
- so->info.instrs_count,
- so->info.max_half_reg + 1,
- so->info.max_reg + 1);
-
- /* print shader type specific info: */
- switch (so->type) {
- case SHADER_VERTEX:
- dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
- dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
- break;
- case SHADER_FRAGMENT:
- dump_reg("pos (bary)", so->pos_regid);
- dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
- dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
- /* these two are hard-coded since we don't know how to
- * program them to anything but all 0's...
- */
- if (so->frag_coord)
- debug_printf("; fragcoord: r0.x\n");
- if (so->frag_face)
- debug_printf("; fragface: hr0.x\n");
- break;
- case SHADER_COMPUTE:
- break;
- }
+ debug_printf("; %s: %s\n", type, str);
+ ir3_shader_disasm(so, bin);
free(bin);
-
- debug_printf("\n");
}
@@ -205,8 +93,7 @@ static void print_usage(void)
printf(" --saturate-s MASK - bitmask of samplers to saturate S coord\n");
printf(" --saturate-t MASK - bitmask of samplers to saturate T coord\n");
printf(" --saturate-r MASK - bitmask of samplers to saturate R coord\n");
- printf(" --nocp - disable copy propagation\n");
- printf(" --nir - use NIR compiler\n");
+ printf(" --stream-out - enable stream-out (aka transform feedback)\n");
printf(" --help - show this message\n");
}
@@ -218,6 +105,7 @@ int main(int argc, char **argv)
struct tgsi_parse_context parse;
struct ir3_compiler *compiler;
struct ir3_shader_variant v;
+ struct ir3_shader s;
struct ir3_shader_key key = {};
const char *info;
void *ptr;
@@ -225,6 +113,9 @@ int main(int argc, char **argv)
fd_mesa_debug |= FD_DBG_DISASM;
+ memset(&s, 0, sizeof(s));
+ memset(&v, 0, sizeof(v));
+
/* cmdline args which impact shader variant get spit out in a
* comment on the first line.. a quick/dirty way to preserve
* that info so when ir3test recompiles the shader with a new
@@ -281,6 +172,24 @@ int main(int argc, char **argv)
continue;
}
+ if (!strcmp(argv[n], "--stream-out")) {
+ struct pipe_stream_output_info *so = &s.stream_output;
+ debug_printf(" %s", argv[n]);
+ /* TODO more dynamic config based on number of outputs, etc
+ * rather than just hard-code for first output:
+ */
+ so->num_outputs = 1;
+ so->stride[0] = 4;
+ so->output[0].register_index = 0;
+ so->output[0].start_component = 0;
+ so->output[0].num_components = 4;
+ so->output[0].output_buffer = 0;
+ so->output[0].dst_offset = 2;
+ so->output[0].stream = 0;
+ n++;
+ continue;
+ }
+
if (!strcmp(argv[n], "--help")) {
print_usage();
return 0;
@@ -292,9 +201,6 @@ int main(int argc, char **argv)
filename = argv[n];
- memset(&v, 0, sizeof(v));
- v.key = key;
-
ret = read_file(filename, &ptr, &size);
if (ret) {
print_usage();
@@ -307,16 +213,21 @@ int main(int argc, char **argv)
if (!tgsi_text_translate(ptr, toks, Elements(toks)))
errx(1, "could not parse `%s'", filename);
+ s.tokens = toks;
+
+ v.key = key;
+ v.shader = &s;
+
tgsi_parse_init(&parse, toks);
switch (parse.FullHeader.Processor.Processor) {
case TGSI_PROCESSOR_FRAGMENT:
- v.type = SHADER_FRAGMENT;
+ s.type = v.type = SHADER_FRAGMENT;
break;
case TGSI_PROCESSOR_VERTEX:
- v.type = SHADER_VERTEX;
+ s.type = v.type = SHADER_VERTEX;
break;
case TGSI_PROCESSOR_COMPUTE:
- v.type = SHADER_COMPUTE;
+ s.type = v.type = SHADER_COMPUTE;
break;
}
@@ -324,7 +235,7 @@ int main(int argc, char **argv)
compiler = ir3_compiler_create(320);
info = "NIR compiler";
- ret = ir3_compile_shader_nir(compiler, &v, toks, key);
+ ret = ir3_compile_shader_nir(compiler, &v);
if (ret) {
fprintf(stderr, "compiler failed!\n");
return ret;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
index 86b1161d9cb..697afeba61a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
@@ -36,14 +36,13 @@ struct ir3_ra_reg_set;
struct ir3_compiler {
uint32_t gpu_id;
struct ir3_ra_reg_set *set;
+ uint32_t shader_count;
};
struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id);
void ir3_compiler_destroy(struct ir3_compiler *compiler);
int ir3_compile_shader_nir(struct ir3_compiler *compiler,
- struct ir3_shader_variant *so,
- const struct tgsi_token *tokens,
- struct ir3_shader_key key);
+ struct ir3_shader_variant *so);
#endif /* IR3_COMPILER_H_ */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 48b1d8f3606..0ab33455ed1 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -117,10 +117,6 @@ struct ir3_compile {
/* for looking up which system value is which */
unsigned sysval_semantics[8];
- /* list of kill instructions: */
- struct ir3_instruction *kill[16];
- unsigned int kill_count;
-
/* set if we encounter something we can't handle yet, so we
* can bail cleanly and fallback to TGSI compiler f/e
*/
@@ -153,6 +149,7 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
nir_opt_global_to_local(s);
nir_convert_to_ssa(s);
nir_lower_idiv(s);
+ nir_lower_load_const_to_scalar(s);
do {
progress = false;
@@ -261,13 +258,29 @@ compile_init(struct ir3_compiler *compiler,
so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
- /* one (vec4) slot for vertex id base: */
- if (so->type == SHADER_VERTEX)
- so->first_immediate++;
+ /* Layout of constant registers:
+ *
+ * num_uniform * vec4 - user consts
+ * 4 * vec4 - UBO addresses
+ * if (vertex shader) {
+ * 1 * vec4 - driver params (IR3_DP_*)
+ * 1 * vec4 - stream-out addresses
+ * }
+ *
+ * TODO this could be made more dynamic, to at least skip sections
+ * that we don't need..
+ */
/* reserve 4 (vec4) slots for ubo base addresses: */
so->first_immediate += 4;
+ if (so->type == SHADER_VERTEX) {
+ /* one (vec4) slot for driver params (see ir3_driver_param): */
+ so->first_immediate++;
+ /* one (vec4) slot for stream-output base addresses: */
+ so->first_immediate++;
+ }
+
return ctx;
}
@@ -637,9 +650,8 @@ create_uniform_indirect(struct ir3_compile *ctx, unsigned n,
mov->cat1.dst_type = TYPE_U32;
ir3_reg_create(mov, 0, 0);
ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV);
- mov->address = address;
- array_insert(ctx->ir->indirects, mov);
+ ir3_instr_set_address(mov, address);
return mov;
}
@@ -677,9 +689,8 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
src->instr = collect;
src->size = arrsz;
src->offset = n;
- mov->address = address;
- array_insert(ctx->ir->indirects, mov);
+ ir3_instr_set_address(mov, address);
return mov;
}
@@ -700,25 +711,21 @@ create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
dst->size = arrsz;
dst->offset = n;
ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
- mov->address = address;
mov->fanin = collect;
- array_insert(ctx->ir->indirects, mov);
+ ir3_instr_set_address(mov, address);
return mov;
}
static struct ir3_instruction *
-create_input(struct ir3_block *block, struct ir3_instruction *instr,
- unsigned n)
+create_input(struct ir3_block *block, unsigned n)
{
struct ir3_instruction *in;
in = ir3_instr_create(block, -1, OPC_META_INPUT);
in->inout.block = block;
ir3_reg_create(in, n, 0);
- if (instr)
- ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr;
return in;
}
@@ -750,7 +757,7 @@ create_frag_coord(struct ir3_compile *ctx, unsigned comp)
compile_assert(ctx, !ctx->frag_coord[comp]);
- ctx->frag_coord[comp] = create_input(ctx->block, NULL, 0);
+ ctx->frag_coord[comp] = create_input(ctx->block, 0);
switch (comp) {
case 0: /* .x */
@@ -789,7 +796,7 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp)
case 0: /* .x */
compile_assert(ctx, !ctx->frag_face);
- ctx->frag_face = create_input(block, NULL, 0);
+ ctx->frag_face = create_input(block, 0);
ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
/* for faceness, we always get -1 or 0 (int).. but TGSI expects
@@ -817,6 +824,14 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp)
}
}
+static struct ir3_instruction *
+create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
+{
+ /* first four vec4 sysval's reserved for UBOs: */
+ unsigned r = regid(ctx->so->first_driver_param + 4, dp);
+ return create_uniform(ctx, r);
+}
+
/* helper for instructions that produce multiple consecutive scalar
* outputs which need to have a split/fanout meta instruction inserted
*/
@@ -1218,7 +1233,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction *load =
ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
load->cat6.type = TYPE_U32;
- load->cat6.offset = off + i * 4; /* byte offset */
+ load->cat6.src_offset = off + i * 4; /* byte offset */
dst[i] = load;
}
}
@@ -1307,7 +1322,7 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
* store_output_indirect? or move this into
* create_indirect_store()?
*/
- for (int j = i; j < arr->length; j += 4) {
+ for (int j = i; j < arr->length; j += intr->num_components) {
struct ir3_instruction *split;
split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
@@ -1318,6 +1333,13 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
arr->arr[j] = split;
}
}
+ /* fixup fanout/split neighbors: */
+ for (int i = 0; i < arr->length; i++) {
+ arr->arr[i]->cp.right = (i < (arr->length - 1)) ?
+ arr->arr[i+1] : NULL;
+ arr->arr[i]->cp.left = (i > 0) ?
+ arr->arr[i-1] : NULL;
+ }
break;
}
default:
@@ -1372,6 +1394,11 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
dst[i] = create_uniform_indirect(ctx, n,
get_addr(ctx, src[0]));
}
+ /* NOTE: if relative addressing is used, we set constlen in
+ * the compiler (to worst-case value) since we don't know in
+ * the assembler what the max addr reg value can be:
+ */
+ ctx->so->constlen = ctx->s->num_uniforms;
break;
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_indirect:
@@ -1409,9 +1436,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_base_vertex:
if (!ctx->basevertex) {
- /* first four vec4 sysval's reserved for UBOs: */
- unsigned r = regid(ctx->so->first_driver_param + 4, 0);
- ctx->basevertex = create_uniform(ctx, r);
+ ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
add_sysval_input(ctx, TGSI_SEMANTIC_BASEVERTEX,
ctx->basevertex);
}
@@ -1419,7 +1444,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_vertex_id_zero_base:
if (!ctx->vertex_id) {
- ctx->vertex_id = create_input(ctx->block, NULL, 0);
+ ctx->vertex_id = create_input(ctx->block, 0);
add_sysval_input(ctx, TGSI_SEMANTIC_VERTEXID_NOBASE,
ctx->vertex_id);
}
@@ -1427,7 +1452,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_instance_id:
if (!ctx->instance_id) {
- ctx->instance_id = create_input(ctx->block, NULL, 0);
+ ctx->instance_id = create_input(ctx->block, 0);
add_sysval_input(ctx, TGSI_SEMANTIC_INSTANCEID,
ctx->instance_id);
}
@@ -1456,7 +1481,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
kill = ir3_KILL(b, cond, 0);
array_insert(ctx->ir->predicates, kill);
- ctx->kill[ctx->kill_count++] = kill;
+ array_insert(ctx->ir->keeps, kill);
ctx->so->has_kill = true;
break;
@@ -1950,6 +1975,115 @@ emit_cf_list(struct ir3_compile *ctx, struct exec_list *list)
}
}
+/* emit stream-out code. At this point, the current block is the original
+ * (nir) end block, and nir ensures that all flow control paths terminate
+ * into the end block. We re-purpose the original end block to generate
+ * the 'if (vtxcnt < maxvtxcnt)' condition, then append the conditional
+ * block holding stream-out write instructions, followed by the new end
+ * block:
+ *
+ * blockOrigEnd {
+ * p0.x = (vtxcnt < maxvtxcnt)
+ * // succs: blockStreamOut, blockNewEnd
+ * }
+ * blockStreamOut {
+ * ... stream-out instructions ...
+ * // succs: blockNewEnd
+ * }
+ * blockNewEnd {
+ * }
+ */
+static void
+emit_stream_out(struct ir3_compile *ctx)
+{
+ struct ir3_shader_variant *v = ctx->so;
+ struct ir3 *ir = ctx->ir;
+ struct pipe_stream_output_info *strmout =
+ &ctx->so->shader->stream_output;
+ struct ir3_block *orig_end_block, *stream_out_block, *new_end_block;
+ struct ir3_instruction *vtxcnt, *maxvtxcnt, *cond;
+ struct ir3_instruction *bases[PIPE_MAX_SO_BUFFERS];
+
+ /* create vtxcnt input in input block at top of shader,
+ * so that it is seen as live over the entire duration
+ * of the shader:
+ */
+ vtxcnt = create_input(ctx->in_block, 0);
+ add_sysval_input(ctx, IR3_SEMANTIC_VTXCNT, vtxcnt);
+
+ maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
+
+ /* at this point, we are at the original 'end' block,
+ * re-purpose this block to stream-out condition, then
+ * append stream-out block and new-end block
+ */
+ orig_end_block = ctx->block;
+
+ stream_out_block = ir3_block_create(ir);
+ list_addtail(&stream_out_block->node, &ir->block_list);
+
+ new_end_block = ir3_block_create(ir);
+ list_addtail(&new_end_block->node, &ir->block_list);
+
+ orig_end_block->successors[0] = stream_out_block;
+ orig_end_block->successors[1] = new_end_block;
+ stream_out_block->successors[0] = new_end_block;
+
+ /* setup 'if (vtxcnt < maxvtxcnt)' condition: */
+ cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
+ cond->regs[0]->num = regid(REG_P0, 0);
+ cond->cat2.condition = IR3_COND_LT;
+
+ /* condition goes on previous block to the conditional,
+ * since it is used to pick which of the two successor
+ * paths to take:
+ */
+ orig_end_block->condition = cond;
+
+ /* switch to stream_out_block to generate the stream-out
+ * instructions:
+ */
+ ctx->block = stream_out_block;
+
+ /* Calculate base addresses based on vtxcnt. Instructions
+ * generated for bases not used in following loop will be
+ * stripped out in the backend.
+ */
+ for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
+ unsigned stride = strmout->stride[i];
+ struct ir3_instruction *base, *off;
+
+ base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
+
+ /* 24-bit should be enough: */
+ off = ir3_MUL_U(ctx->block, vtxcnt, 0,
+ create_immed(ctx->block, stride * 4), 0);
+
+ bases[i] = ir3_ADD_S(ctx->block, off, 0, base, 0);
+ }
+
+ /* Generate the per-output store instructions: */
+ for (unsigned i = 0; i < strmout->num_outputs; i++) {
+ for (unsigned j = 0; j < strmout->output[i].num_components; j++) {
+ unsigned c = j + strmout->output[i].start_component;
+ struct ir3_instruction *base, *out, *stg;
+
+ base = bases[strmout->output[i].output_buffer];
+ out = ctx->ir->outputs[regid(strmout->output[i].register_index, c)];
+
+ stg = ir3_STG(ctx->block, base, 0, out, 0,
+ create_immed(ctx->block, 1), 0);
+ stg->cat6.type = TYPE_U32;
+ stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4;
+
+ array_insert(ctx->ir->keeps, stg);
+ }
+ }
+
+ /* and finally switch to the new_end_block: */
+ ctx->block = new_end_block;
+}
+
static void
emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
{
@@ -1960,6 +2094,24 @@ emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
* into which we emit the 'end' instruction.
*/
compile_assert(ctx, list_empty(&ctx->block->instr_list));
+
+ /* If stream-out (aka transform-feedback) enabled, emit the
+ * stream-out instructions, followed by a new empty block (into
+ * which the 'end' instruction lands).
+ *
+ * NOTE: it is done in this order, rather than inserting before
+ * we emit end_block, because NIR guarantees that all blocks
+ * flow into end_block, and that end_block has no successors.
+ * So by re-purposing end_block as the first block of stream-
+ * out, we guarantee that all exit paths flow into the stream-
+ * out instructions.
+ */
+ if ((ctx->so->shader->stream_output.num_outputs > 0) &&
+ !ctx->so->key.binning_pass) {
+ debug_assert(ctx->so->type == SHADER_VERTEX);
+ emit_stream_out(ctx);
+ }
+
ir3_END(ctx->block);
}
@@ -1974,7 +2126,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
unsigned semantic_index = in->data.index;
unsigned n = in->data.driver_location;
- DBG("; in: %u:%u, len=%ux%u, loc=%u\n",
+ DBG("; in: %u:%u, len=%ux%u, loc=%u",
semantic_name, semantic_index, array_len,
ncomp, n);
@@ -2045,7 +2197,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
so->inputs[n].inloc + i - 8, use_ldlv);
}
} else {
- instr = create_input(ctx->block, NULL, idx);
+ instr = create_input(ctx->block, idx);
}
ctx->ir->inputs[idx] = instr;
@@ -2069,7 +2221,7 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
unsigned n = out->data.driver_location;
unsigned comp = 0;
- DBG("; out: %u:%u, len=%ux%u, loc=%u\n",
+ DBG("; out: %u:%u, len=%ux%u, loc=%u",
semantic_name, semantic_index, array_len,
ncomp, n);
@@ -2098,6 +2250,10 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
so->writes_pos = true;
break;
case TGSI_SEMANTIC_COLOR:
+ if (semantic_index == -1) {
+ semantic_index = 0;
+ so->color0_mrt = 1;
+ }
break;
default:
compile_error(ctx, "unknown FS semantic name: %s\n",
@@ -2136,13 +2292,9 @@ emit_instructions(struct ir3_compile *ctx)
ninputs = exec_list_length(&ctx->s->inputs) * 4;
noutputs = exec_list_length(&ctx->s->outputs) * 4;
- /* we need to allocate big enough outputs array so that
- * we can stuff the kill's at the end. Likewise for vtx
- * shaders, we need to leave room for sysvals:
+ /* or vtx shaders, we need to leave room for sysvals:
*/
- if (ctx->so->type == SHADER_FRAGMENT) {
- noutputs += ARRAY_SIZE(ctx->kill);
- } else if (ctx->so->type == SHADER_VERTEX) {
+ if (ctx->so->type == SHADER_VERTEX) {
ninputs += 8;
}
@@ -2153,9 +2305,7 @@ emit_instructions(struct ir3_compile *ctx)
ctx->in_block = ctx->block;
list_addtail(&ctx->block->node, &ctx->ir->block_list);
- if (ctx->so->type == SHADER_FRAGMENT) {
- ctx->ir->noutputs -= ARRAY_SIZE(ctx->kill);
- } else if (ctx->so->type == SHADER_VERTEX) {
+ if (ctx->so->type == SHADER_VERTEX) {
ctx->ir->ninputs -= 8;
}
@@ -2254,13 +2404,13 @@ fixup_frag_inputs(struct ir3_compile *ctx)
so->pos_regid = regid;
/* r0.x */
- instr = create_input(ctx->in_block, NULL, ir->ninputs);
+ instr = create_input(ctx->in_block, ir->ninputs);
instr->regs[0]->num = regid++;
inputs[ir->ninputs++] = instr;
ctx->frag_pos->regs[1]->instr = instr;
/* r0.y */
- instr = create_input(ctx->in_block, NULL, ir->ninputs);
+ instr = create_input(ctx->in_block, ir->ninputs);
instr->regs[0]->num = regid++;
inputs[ir->ninputs++] = instr;
ctx->frag_pos->regs[2]->instr = instr;
@@ -2270,9 +2420,7 @@ fixup_frag_inputs(struct ir3_compile *ctx)
int
ir3_compile_shader_nir(struct ir3_compiler *compiler,
- struct ir3_shader_variant *so,
- const struct tgsi_token *tokens,
- struct ir3_shader_key key)
+ struct ir3_shader_variant *so)
{
struct ir3_compile *ctx;
struct ir3 *ir;
@@ -2282,7 +2430,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
assert(!so->ir);
- ctx = compile_init(compiler, so, tokens);
+ ctx = compile_init(compiler, so, so->shader->tokens);
if (!ctx) {
DBG("INIT failed!");
ret = -1;
@@ -2307,7 +2455,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
fixup_frag_inputs(ctx);
/* at this point, for binning pass, throw away unneeded outputs: */
- if (key.binning_pass) {
+ if (so->key.binning_pass) {
for (i = 0, j = 0; i < so->outputs_count; i++) {
unsigned name = sem2name(so->outputs[i].semantic);
unsigned idx = sem2idx(so->outputs[i].semantic);
@@ -2332,7 +2480,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
/* if we want half-precision outputs, mark the output registers
* as half:
*/
- if (key.half_precision) {
+ if (so->key.half_precision) {
for (i = 0; i < ir->noutputs; i++) {
struct ir3_instruction *out = ir->outputs[i];
if (!out)
@@ -2353,15 +2501,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
}
}
- /* at this point, we want the kill's in the outputs array too,
- * so that they get scheduled (since they have no dst).. we've
- * already ensured that the array is big enough in push_block():
- */
- if (so->type == SHADER_FRAGMENT) {
- for (i = 0; i < ctx->kill_count; i++)
- ir->outputs[ir->noutputs++] = ctx->kill[i];
- }
-
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("BEFORE CP:\n");
ir3_print(ir);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index 8c7c80f7aae..be4e4e81109 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -291,7 +291,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
instr->regs[n+1] = src_reg;
if (src_reg->flags & IR3_REG_RELATIV)
- instr->address = reg->instr->address;
+ ir3_instr_set_address(instr, reg->instr->address);
return;
}
@@ -300,7 +300,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
!conflicts(instr->address, reg->instr->address)) {
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
- instr->address = reg->instr->address;
+ ir3_instr_set_address(instr, reg->instr->address);
return;
}
@@ -389,7 +389,7 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags)
}
if (instr->address)
- instr->address = instr_cp(instr->address, NULL);
+ ir3_instr_set_address(instr, instr_cp(instr->address, NULL));
return instr;
}
@@ -408,6 +408,10 @@ ir3_cp(struct ir3 *ir)
}
}
+ for (unsigned i = 0; i < ir->keeps_count; i++) {
+ ir->keeps[i] = instr_cp(ir->keeps[i], NULL);
+ }
+
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
if (block->condition)
block->condition = instr_cp(block->condition, NULL);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index 3a108243479..97df0c2ac99 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -156,6 +156,9 @@ ir3_depth(struct ir3 *ir)
if (ir->outputs[i])
ir3_instr_depth(ir->outputs[i]);
+ for (i = 0; i < ir->keeps_count; i++)
+ ir3_instr_depth(ir->keeps[i]);
+
/* We also need to account for if-condition: */
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
if (block->condition)
@@ -167,6 +170,15 @@ ir3_depth(struct ir3 *ir)
remove_unused_by_block(block);
}
+ /* note that we can end up with unused indirects, but we should
+ * not end up with unused predicates.
+ */
+ for (i = 0; i < ir->indirects_count; i++) {
+ struct ir3_instruction *instr = ir->indirects[i];
+ if (instr->depth == DEPTH_UNUSED)
+ ir->indirects[i] = NULL;
+ }
+
/* cleanup unused inputs: */
for (i = 0; i < ir->ninputs; i++) {
struct ir3_instruction *in = ir->inputs[i];
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c
index 70d9b08e019..ca28aefd502 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_group.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c
@@ -236,6 +236,11 @@ find_neighbors(struct ir3 *ir)
instr_find_neighbors(instr);
}
}
+
+ for (i = 0; i < ir->keeps_count; i++) {
+ struct ir3_instruction *instr = ir->keeps[i];
+ instr_find_neighbors(instr);
+ }
}
void
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
index f4a4223ae17..e94293f6d6b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
@@ -182,14 +182,14 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
*/
ctx->has_samp = true;
regmask_set(&needs_sy, n->regs[0]);
- } else if (is_mem(n)) {
+ } else if (is_load(n)) {
regmask_set(&needs_sy, n->regs[0]);
}
/* both tex/sfu appear to not always immediately consume
* their src register(s):
*/
- if (is_tex(n) || is_sfu(n) || is_mem(n)) {
+ if (is_tex(n) || is_sfu(n) || is_load(n)) {
foreach_src(reg, n) {
if (reg_gpr(reg))
regmask_set(&needs_ss_war, reg);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c
index f377982dd5e..07e03d26908 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_print.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c
@@ -175,6 +175,20 @@ print_instr(struct ir3_instruction *instr, int lvl)
printf("]");
}
+ if (instr->cp.left) {
+ printf(", left=_");
+ printf("[");
+ print_instr_name(instr->cp.left);
+ printf("]");
+ }
+
+ if (instr->cp.right) {
+ printf(", right=_");
+ printf("[");
+ print_instr_name(instr->cp.right);
+ printf("]");
+ }
+
if (is_meta(instr)) {
if (instr->opc == OPC_META_FO) {
printf(", off=%d", instr->fo.off);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index e5aba859fab..eaf3b3c35e8 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -241,6 +241,21 @@ ir3_ra_alloc_reg_set(void *memctx)
return set;
}
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+ BITSET_WORD *def; /* variables defined before used in block */
+ BITSET_WORD *use; /* variables used before defined in block */
+ BITSET_WORD *livein; /* which defs reach entry point of block */
+ BITSET_WORD *liveout; /* which defs reach exit point of block */
+};
+
+/* additional instruction-data (per-instruction) */
+struct ir3_ra_instr_data {
+ /* cached instruction 'definer' info: */
+ struct ir3_instruction *defn;
+ int off, sz, cls;
+};
+
/* register-assign context, per-shader */
struct ir3_ra_ctx {
struct ir3 *ir;
@@ -254,14 +269,7 @@ struct ir3_ra_ctx {
unsigned class_base[total_class_count];
unsigned instr_cnt;
unsigned *def, *use; /* def/use table */
-};
-
-/* additional block-data (per-block) */
-struct ir3_ra_block_data {
- BITSET_WORD *def; /* variables defined before used in block */
- BITSET_WORD *use; /* variables used before defined in block */
- BITSET_WORD *livein; /* which defs reach entry point of block */
- BITSET_WORD *liveout; /* which defs reach exit point of block */
+ struct ir3_ra_instr_data *instrd;
};
static bool
@@ -291,8 +299,6 @@ is_temp(struct ir3_register *reg)
{
if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
return false;
- if (reg->flags & IR3_REG_RELATIV) // TODO
- return false;
if ((reg->num == regid(REG_A0, 0)) ||
(reg->num == regid(REG_P0, 0)))
return false;
@@ -309,28 +315,45 @@ writes_gpr(struct ir3_instruction *instr)
}
static struct ir3_instruction *
-get_definer(struct ir3_instruction *instr, int *sz, int *off)
+get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
+ int *sz, int *off)
{
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_instruction *d = NULL;
+
+ if (instr->fanin)
+ return get_definer(ctx, instr->fanin, sz, off);
+
+ if (id->defn) {
+ *sz = id->sz;
+ *off = id->off;
+ return id->defn;
+ }
+
if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
/* What about the case where collect is subset of array, we
* need to find the distance between where actual array starts
* and fanin.. that probably doesn't happen currently.
*/
struct ir3_register *src;
+ int dsz, doff;
/* note: don't use foreach_ssa_src as this gets called once
* while assigning regs (which clears SSA flag)
*/
- foreach_src(src, instr) {
+ foreach_src_n(src, n, instr) {
+ struct ir3_instruction *dd;
if (!src->instr)
continue;
- if ((!d) || (src->instr->ip < d->ip))
- d = src->instr;
- }
- *sz = instr->regs_count - 1;
- *off = 0;
+ dd = get_definer(ctx, src->instr, &dsz, &doff);
+
+ if ((!d) || (dd->ip < d->ip)) {
+ d = dd;
+ *sz = dsz;
+ *off = doff - n;
+ }
+ }
} else if (instr->cp.right || instr->cp.left) {
/* covers also the meta:fo case, which ends up w/ single
@@ -386,7 +409,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
struct ir3_instruction *dd;
int dsz, doff;
- dd = get_definer(phi, &dsz, &doff);
+ dd = get_definer(ctx, phi, &dsz, &doff);
*sz = MAX2(*sz, dsz);
*off = doff;
@@ -401,6 +424,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
* the phi, so we don't need to chase definers
*/
struct ir3_register *src;
+ struct ir3_instruction *dd = d;
/* note: don't use foreach_ssa_src as this gets called once
* while assigning regs (which clears SSA flag)
@@ -408,16 +432,18 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
foreach_src(src, d) {
if (!src->instr)
continue;
- if (src->instr->ip < d->ip)
- d = src->instr;
+ if (src->instr->ip < dd->ip)
+ dd = src->instr;
}
+
+ d = dd;
}
if (is_meta(d) && (d->opc == OPC_META_FO)) {
struct ir3_instruction *dd;
int dsz, doff;
- dd = get_definer(d->regs[1]->instr, &dsz, &doff);
+ dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
/* by definition, should come before: */
debug_assert(dd->ip < d->ip);
@@ -429,9 +455,30 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
d = dd;
}
+ id->defn = d;
+ id->sz = *sz;
+ id->off = *off;
+
return d;
}
+static void
+ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+ if (instr->regs_count == 0)
+ continue;
+ /* couple special cases: */
+ if (writes_addr(instr) || writes_pred(instr)) {
+ id->cls = -1;
+ continue;
+ }
+ id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+ id->cls = size_to_class(id->sz, is_half(id->defn));
+ }
+}
+
/* give each instruction a name (and ip), and count up the # of names
* of each class
*/
@@ -439,8 +486,11 @@ static void
ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+#ifdef DEBUG
+ instr->name = ~0;
+#endif
ctx->instr_cnt++;
@@ -450,9 +500,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (!writes_gpr(instr))
continue;
- defn = get_definer(instr, &sz, &off);
-
- if (defn != instr)
+ if (id->defn != instr)
continue;
/* arrays which don't fit in one of the pre-defined class
@@ -460,9 +508,8 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*
* TODO but we still need to allocate names for them, don't we??
*/
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- instr->name = ctx->class_alloc_count[cls]++;
+ if (id->cls >= 0) {
+ instr->name = ctx->class_alloc_count[id->cls]++;
ctx->alloc_count++;
}
}
@@ -471,8 +518,16 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static void
ra_init(struct ir3_ra_ctx *ctx)
{
+ unsigned n;
+
ir3_clear_mark(ctx->ir);
- ir3_count_instructions(ctx->ir);
+ n = ir3_count_instructions(ctx->ir);
+
+ ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
+
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ ra_block_find_definers(ctx, block);
+ }
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
ra_block_name_instructions(ctx, block);
@@ -488,6 +543,7 @@ ra_init(struct ir3_ra_ctx *ctx)
}
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
+ ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
}
@@ -555,39 +611,36 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*/
if (writes_gpr(instr)) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- if (defn == instr) {
+ if (id->defn == instr) {
/* arrays which don't fit in one of the pre-defined class
* sizes are pre-colored:
*/
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
- ctx->def[name] = defn->ip;
- ctx->use[name] = defn->ip;
+ ctx->def[name] = id->defn->ip;
+ ctx->use[name] = id->defn->ip;
/* since we are in SSA at this point: */
debug_assert(!BITSET_TEST(bd->use, name));
BITSET_SET(bd->def, name);
- if (is_half(defn)) {
+ if (is_half(id->defn)) {
ra_set_node_class(ctx->g, name,
- ctx->set->half_classes[cls - class_count]);
+ ctx->set->half_classes[id->cls - class_count]);
} else {
ra_set_node_class(ctx->g, name,
- ctx->set->classes[cls]);
+ ctx->set->classes[id->cls]);
}
/* extend the live range for phi srcs, which may come
* from the bottom of the loop
*/
- if (defn->regs[0]->flags & IR3_REG_PHI_SRC) {
- struct ir3_instruction *phi = defn->regs[0]->instr;
+ if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
+ struct ir3_instruction *phi = id->defn->regs[0]->instr;
foreach_ssa_src(src, phi) {
/* if src is after phi, then we need to extend
* the liverange to the end of src's block:
@@ -606,13 +659,10 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
foreach_ssa_src(src, instr) {
if (writes_gpr(src)) {
- struct ir3_instruction *srcdefn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
- srcdefn = get_definer(src, &sz, &off);
- cls = size_to_class(sz, is_half(srcdefn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, srcdefn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
if (!BITSET_TEST(bd->def, name))
BITSET_SET(bd->use, name);
@@ -704,13 +754,10 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
/* need to fix things up to keep outputs live: */
for (unsigned i = 0; i < ir->noutputs; i++) {
struct ir3_instruction *instr = ir->outputs[i];
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = ctx->instr_cnt;
}
}
@@ -780,15 +827,12 @@ static void
reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
struct ir3_instruction *instr)
{
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
unsigned r = ra_get_node_reg(ctx->g, name);
- unsigned num = ctx->set->ra_reg_to_gpr[r] + off;
+ unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
if (reg->flags & IR3_REG_RELATIV)
num += reg->offset;
@@ -796,7 +840,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
reg->num = num;
reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
- if (is_half(defn))
+ if (is_half(id->defn))
reg->flags |= IR3_REG_HALF;
}
}
@@ -851,19 +895,16 @@ ra_alloc(struct ir3_ra_ctx *ctx)
for (j = 0; i < ir->ninputs; i++) {
struct ir3_instruction *instr = ir->inputs[i];
if (instr) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- if (defn == instr) {
+ if (id->defn == instr) {
unsigned name, reg;
- cls = size_to_class(sz, is_half(defn));
- name = ra_name(ctx, cls, defn);
- reg = ctx->set->gpr_to_ra_reg[cls][j];
+ name = ra_name(ctx, id->cls, id->defn);
+ reg = ctx->set->gpr_to_ra_reg[id->cls][j];
ra_set_node_reg(ctx->g, name, reg);
- j += sz;
+ j += id->sz;
}
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
index 49a4426d163..2ee325518f7 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
@@ -80,12 +80,12 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
list_delinit(&instr->node);
if (writes_addr(instr)) {
- assert(ctx->addr == NULL);
+ debug_assert(ctx->addr == NULL);
ctx->addr = instr;
}
if (writes_pred(instr)) {
- assert(ctx->pred == NULL);
+ debug_assert(ctx->pred == NULL);
ctx->pred = instr;
}
@@ -180,13 +180,13 @@ check_conflict(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
* free:
*/
if (writes_addr(instr) && ctx->addr) {
- assert(ctx->addr != instr);
+ debug_assert(ctx->addr != instr);
notes->addr_conflict = true;
return true;
}
if (writes_pred(instr) && ctx->pred) {
- assert(ctx->pred != instr);
+ debug_assert(ctx->pred != instr);
notes->pred_conflict = true;
return true;
}
@@ -261,6 +261,20 @@ instr_eligibility(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
return 0;
}
+/* could an instruction be scheduled if specified ssa src was scheduled? */
+static bool
+could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
+{
+ struct ir3_instruction *other_src;
+ foreach_ssa_src(other_src, instr) {
+ /* if dependency not scheduled, we aren't ready yet: */
+ if ((src != other_src) && !is_scheduled(other_src)) {
+ return false;
+ }
+ }
+ return true;
+}
+
/* move eligible instructions to the priority list: */
static unsigned
add_eligible_instrs(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
@@ -272,6 +286,31 @@ add_eligible_instrs(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
int e = instr_eligibility(ctx, notes, instr);
if (e < 0)
continue;
+
+ /* For instructions that write address register we need to
+ * make sure there is at least one instruction that uses the
+ * addr value which is otherwise ready.
+ *
+ * TODO if any instructions use pred register and have other
+ * src args, we would need to do the same for writes_pred()..
+ */
+ if (unlikely(writes_addr(instr))) {
+ struct ir3 *ir = instr->block->shader;
+ bool ready = false;
+ for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
+ struct ir3_instruction *indirect = ir->indirects[i];
+ if (!indirect)
+ continue;
+ if (indirect->address != instr)
+ continue;
+ ready = could_sched(indirect, instr);
+ }
+
+ /* nothing could be scheduled, so keep looking: */
+ if (!ready)
+ continue;
+ }
+
min_delay = MIN2(min_delay, e);
if (e == 0) {
/* remove from unscheduled list and into priority queue: */
@@ -287,20 +326,25 @@ add_eligible_instrs(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
* instructions which depend on the current address register
* to a clone of the instruction which wrote the address reg.
*/
-static void
+static struct ir3_instruction *
split_addr(struct ir3_sched_ctx *ctx)
{
- struct ir3 *ir = ctx->addr->block->shader;
+ struct ir3 *ir;
struct ir3_instruction *new_addr = NULL;
unsigned i;
debug_assert(ctx->addr);
+ ir = ctx->addr->block->shader;
+
for (i = 0; i < ir->indirects_count; i++) {
struct ir3_instruction *indirect = ir->indirects[i];
+ if (!indirect)
+ continue;
+
/* skip instructions already scheduled: */
- if (indirect->flags & IR3_INSTR_MARK)
+ if (is_scheduled(indirect))
continue;
/* remap remaining instructions using current addr
@@ -312,32 +356,36 @@ split_addr(struct ir3_sched_ctx *ctx)
/* original addr is scheduled, but new one isn't: */
new_addr->flags &= ~IR3_INSTR_MARK;
}
- indirect->address = new_addr;
+ ir3_instr_set_address(indirect, new_addr);
}
}
/* all remaining indirects remapped to new addr: */
ctx->addr = NULL;
+
+ return new_addr;
}
/* "spill" the predicate register by remapping any unscheduled
* instructions which depend on the current predicate register
* to a clone of the instruction which wrote the address reg.
*/
-static void
+static struct ir3_instruction *
split_pred(struct ir3_sched_ctx *ctx)
{
- struct ir3 *ir = ctx->pred->block->shader;
+ struct ir3 *ir;
struct ir3_instruction *new_pred = NULL;
unsigned i;
debug_assert(ctx->pred);
+ ir = ctx->pred->block->shader;
+
for (i = 0; i < ir->predicates_count; i++) {
struct ir3_instruction *predicated = ir->predicates[i];
/* skip instructions already scheduled: */
- if (predicated->flags & IR3_INSTR_MARK)
+ if (is_scheduled(predicated))
continue;
/* remap remaining instructions using current pred
@@ -358,6 +406,8 @@ split_pred(struct ir3_sched_ctx *ctx)
/* all remaining predicated remapped to new pred: */
ctx->pred = NULL;
+
+ return new_pred;
}
static void
@@ -407,20 +457,32 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
schedule(ctx, instr);
} else if (delay == ~0) {
+ struct ir3_instruction *new_instr = NULL;
+
/* nothing available to schedule.. if we are blocked on
* address/predicate register conflict, then break the
* deadlock by cloning the instruction that wrote that
* reg:
*/
if (notes.addr_conflict) {
- split_addr(ctx);
+ new_instr = split_addr(ctx);
} else if (notes.pred_conflict) {
- split_pred(ctx);
+ new_instr = split_pred(ctx);
} else {
debug_assert(0);
ctx->error = true;
return;
}
+
+ if (new_instr) {
+ list_del(&new_instr->node);
+ list_addtail(&new_instr->node, &unscheduled_list);
+ /* the original instr that wrote addr/pred may have
+ * originated from a different block:
+ */
+ new_instr->block = block;
+ }
+
} else {
/* and if we run out of instructions that can be scheduled,
* then it is time for nop's:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index b5b038100cc..312174c0c6d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -46,7 +46,8 @@ delete_variant(struct ir3_shader_variant *v)
{
if (v->ir)
ir3_destroy(v->ir);
- fd_bo_del(v->bo);
+ if (v->bo)
+ fd_bo_del(v->bo);
free(v);
}
@@ -139,6 +140,32 @@ assemble_variant(struct ir3_shader_variant *v)
memcpy(fd_bo_map(v->bo), bin, sz);
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ struct ir3_shader_key key = v->key;
+ DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
+ key.binning_pass, key.color_two_side, key.half_precision);
+ ir3_shader_disasm(v, bin);
+ }
+
+ if (fd_mesa_debug & FD_DBG_SHADERDB) {
+ /* print generic shader info: */
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %u instructions, %u dwords\n",
+ ir3_shader_stage(v->shader),
+ v->shader->id, v->id,
+ v->info.instrs_count,
+ v->info.sizedwords);
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %u half, %u full\n",
+ ir3_shader_stage(v->shader),
+ v->shader->id, v->id,
+ v->info.max_half_reg + 1,
+ v->info.max_reg + 1);
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %u const, %u constlen\n",
+ ir3_shader_stage(v->shader),
+ v->shader->id, v->id,
+ v->info.max_const + 1,
+ v->constlen);
+ }
+
free(bin);
/* no need to keep the ir around beyond this point: */
@@ -150,12 +177,12 @@ static struct ir3_shader_variant *
create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
{
struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
- const struct tgsi_token *tokens = shader->tokens;
int ret;
if (!v)
return NULL;
+ v->id = ++shader->variant_count;
v->shader = shader;
v->key = key;
v->type = shader->type;
@@ -163,10 +190,10 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
if (fd_mesa_debug & FD_DBG_DISASM) {
DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", shader->type,
key.binning_pass, key.color_two_side, key.half_precision);
- tgsi_dump(tokens, 0);
+ tgsi_dump(shader->tokens, 0);
}
- ret = ir3_compile_shader_nir(shader->compiler, v, tokens, key);
+ ret = ir3_compile_shader_nir(shader->compiler, v);
if (ret) {
debug_error("compile failed!");
goto fail;
@@ -178,12 +205,6 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
goto fail;
}
- if (fd_mesa_debug & FD_DBG_DISASM) {
- DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
- key.binning_pass, key.color_two_side, key.half_precision);
- disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
- }
-
return v;
fail:
@@ -228,8 +249,10 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
/* compile new variant if it doesn't exist already: */
v = create_variant(shader, key);
- v->next = shader->variants;
- shader->variants = v;
+ if (v) {
+ v->next = shader->variants;
+ shader->variants = v;
+ }
return v;
}
@@ -249,13 +272,372 @@ ir3_shader_destroy(struct ir3_shader *shader)
}
struct ir3_shader *
-ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens,
+ir3_shader_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso,
enum shader_t type)
{
struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
shader->compiler = fd_context(pctx)->screen->compiler;
+ shader->id = ++shader->compiler->shader_count;
shader->pctx = pctx;
shader->type = type;
- shader->tokens = tgsi_dup_tokens(tokens);
+ shader->tokens = tgsi_dup_tokens(cso->tokens);
+ shader->stream_output = cso->stream_output;
+ if (fd_mesa_debug & FD_DBG_SHADERDB) {
+ /* if shader-db run, create a standard variant immediately
+ * (as otherwise nothing will trigger the shader to be
+ * actually compiled)
+ */
+ static struct ir3_shader_key key = {};
+ ir3_shader_variant(shader, key);
+ }
return shader;
}
+
+static void dump_reg(const char *name, uint32_t r)
+{
+ if (r != regid(63,0))
+ debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
+}
+
+static void dump_semantic(struct ir3_shader_variant *so,
+ unsigned sem, const char *name)
+{
+ uint32_t regid;
+ regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
+ dump_reg(name, regid);
+}
+
+void
+ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
+{
+ struct ir3 *ir = so->ir;
+ struct ir3_register *reg;
+ const char *type = ir3_shader_stage(so->shader);
+ uint8_t regid;
+ unsigned i;
+
+ for (i = 0; i < ir->ninputs; i++) {
+ if (!ir->inputs[i]) {
+ debug_printf("; in%d unused\n", i);
+ continue;
+ }
+ reg = ir->inputs[i]->regs[0];
+ regid = reg->num;
+ debug_printf("@in(%sr%d.%c)\tin%d\n",
+ (reg->flags & IR3_REG_HALF) ? "h" : "",
+ (regid >> 2), "xyzw"[regid & 0x3], i);
+ }
+
+ for (i = 0; i < ir->noutputs; i++) {
+ if (!ir->outputs[i]) {
+ debug_printf("; out%d unused\n", i);
+ continue;
+ }
+ /* kill shows up as a virtual output.. skip it! */
+ if (is_kill(ir->outputs[i]))
+ continue;
+ reg = ir->outputs[i]->regs[0];
+ regid = reg->num;
+ debug_printf("@out(%sr%d.%c)\tout%d\n",
+ (reg->flags & IR3_REG_HALF) ? "h" : "",
+ (regid >> 2), "xyzw"[regid & 0x3], i);
+ }
+
+ for (i = 0; i < so->immediates_count; i++) {
+ debug_printf("@const(c%d.x)\t", so->first_immediate + i);
+ debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
+ so->immediates[i].val[0],
+ so->immediates[i].val[1],
+ so->immediates[i].val[2],
+ so->immediates[i].val[3]);
+ }
+
+ disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
+
+ debug_printf("; %s: outputs:", type);
+ for (i = 0; i < so->outputs_count; i++) {
+ uint8_t regid = so->outputs[i].regid;
+ ir3_semantic sem = so->outputs[i].semantic;
+ debug_printf(" r%d.%c (%u:%u)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ sem2name(sem), sem2idx(sem));
+ }
+ debug_printf("\n");
+ debug_printf("; %s: inputs:", type);
+ for (i = 0; i < so->inputs_count; i++) {
+ uint8_t regid = so->inputs[i].regid;
+ ir3_semantic sem = so->inputs[i].semantic;
+ debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ sem2name(sem), sem2idx(sem),
+ so->inputs[i].compmask,
+ so->inputs[i].inloc,
+ so->inputs[i].bary);
+ }
+ debug_printf("\n");
+
+ /* print generic shader info: */
+ debug_printf("; %s prog %d/%d: %u instructions, %d half, %d full\n",
+ type, so->shader->id, so->id,
+ so->info.instrs_count,
+ so->info.max_half_reg + 1,
+ so->info.max_reg + 1);
+
+ debug_printf("; %d const, %u constlen\n",
+ so->info.max_const + 1,
+ so->constlen);
+
+ /* print shader type specific info: */
+ switch (so->type) {
+ case SHADER_VERTEX:
+ dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
+ dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
+ break;
+ case SHADER_FRAGMENT:
+ dump_reg("pos (bary)", so->pos_regid);
+ dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
+ dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
+ /* these two are hard-coded since we don't know how to
+ * program them to anything but all 0's...
+ */
+ if (so->frag_coord)
+ debug_printf("; fragcoord: r0.x\n");
+ if (so->frag_face)
+ debug_printf("; fragface: hr0.x\n");
+ break;
+ case SHADER_COMPUTE:
+ break;
+ }
+
+ debug_printf("\n");
+}
+
+/* This has to reach into the fd_context a bit more than the rest of
+ * ir3, but it needs to be aligned with the compiler, so both agree
+ * on which const regs hold what. And the logic is identical between
+ * a3xx/a4xx, the only difference is small details in the actual
+ * CP_LOAD_STATE packets (which is handled inside the generation
+ * specific ctx->emit_const(_bo)() fxns)
+ */
+
+#include "freedreno_resource.h"
+
+static void
+emit_user_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ struct fd_constbuf_stateobj *constbuf)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ const unsigned index = 0; /* user consts are index 0 */
+ /* TODO save/restore dirty_mask for binning pass instead: */
+ uint32_t dirty_mask = constbuf->enabled_mask;
+
+ if (dirty_mask & (1 << index)) {
+ struct pipe_constant_buffer *cb = &constbuf->cb[index];
+ unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+ /* in particular, with binning shader we may end up with
+ * unused consts, ie. we could end up w/ constlen that is
+ * smaller than first_driver_param. In that case truncate
+ * the user consts early to avoid HLSQ lockup caused by
+ * writing too many consts
+ */
+ uint32_t max_const = MIN2(v->first_driver_param, v->constlen);
+
+ // I expect that size should be a multiple of vec4's:
+ assert(size == align(size, 4));
+
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
+ */
+ size = MIN2(size, 4 * max_const);
+
+ if (size > 0) {
+ fd_wfi(ctx, ring);
+ ctx->emit_const(ring, v->type, 0,
+ cb->buffer_offset, size,
+ cb->user_buffer, cb->buffer);
+ constbuf->dirty_mask &= ~(1 << index);
+ }
+ }
+}
+
+static void
+emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ struct fd_constbuf_stateobj *constbuf)
+{
+ uint32_t offset = v->first_driver_param; /* UBOs after user consts */
+ if (v->constlen > offset) {
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ uint32_t params = MIN2(4, v->constlen - offset) * 4;
+ uint32_t offsets[params];
+ struct fd_bo *bos[params];
+
+ for (uint32_t i = 0; i < params; i++) {
+ const uint32_t index = i + 1; /* UBOs start at index 1 */
+ struct pipe_constant_buffer *cb = &constbuf->cb[index];
+ assert(!cb->user_buffer);
+
+ if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) {
+ offsets[i] = cb->buffer_offset;
+ bos[i] = fd_resource(cb->buffer)->bo;
+ } else {
+ offsets[i] = 0;
+ bos[i] = NULL;
+ }
+ }
+
+ fd_wfi(ctx, ring);
+ ctx->emit_const_bo(ring, v->type, false, offset * 4, params, bos, offsets);
+ }
+}
+
+static void
+emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ int size = v->immediates_count;
+ uint32_t base = v->first_immediate;
+
+ /* truncate size to avoid writing constants that shader
+ * does not use:
+ */
+ size = MIN2(size + base, v->constlen) - base;
+
+ /* convert out of vec4: */
+ base *= 4;
+ size *= 4;
+
+ if (size > 0) {
+ fd_wfi(ctx, ring);
+ ctx->emit_const(ring, v->type, base,
+ 0, size, v->immediates[0].val, NULL);
+ }
+}
+
+/* emit stream-out buffers: */
+static void
+emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+ uint32_t offset = v->first_driver_param + 5; /* streamout addresses after driver-params*/
+ if (v->constlen > offset) {
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+ struct pipe_stream_output_info *info = &v->shader->stream_output;
+ uint32_t params = 4;
+ uint32_t offsets[params];
+ struct fd_bo *bos[params];
+
+ for (uint32_t i = 0; i < params; i++) {
+ struct pipe_stream_output_target *target = so->targets[i];
+
+ if (target) {
+ offsets[i] = (so->offsets[i] * info->stride[i] * 4) +
+ target->buffer_offset;
+ bos[i] = fd_resource(target->buffer)->bo;
+ } else {
+ offsets[i] = 0;
+ bos[i] = NULL;
+ }
+ }
+
+ fd_wfi(ctx, ring);
+ ctx->emit_const_bo(ring, v->type, true, offset * 4, params, bos, offsets);
+ }
+}
+
+static uint32_t
+max_tf_vtx(struct ir3_shader_variant *v)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+ struct pipe_stream_output_info *info = &v->shader->stream_output;
+ uint32_t maxvtxcnt = 0x7fffffff;
+
+ if (v->key.binning_pass)
+ return 0;
+ if (v->shader->stream_output.num_outputs == 0)
+ return 0;
+ if (so->num_targets == 0)
+ return 0;
+
+ /* offset to write to is:
+ *
+ * total_vtxcnt = vtxcnt + offsets[i]
+ * offset = total_vtxcnt * stride[i]
+ *
+ * offset = vtxcnt * stride[i] ; calculated in shader
+ * + offsets[i] * stride[i] ; calculated at emit_tfbos()
+ *
+ * assuming for each vtx, each target buffer will have data written
+ * up to 'offset + stride[i]', that leaves maxvtxcnt as:
+ *
+ * buffer_size = (maxvtxcnt * stride[i]) + stride[i]
+ * maxvtxcnt = (buffer_size - stride[i]) / stride[i]
+ *
+ * but shader is actually doing a less-than (rather than less-than-
+ * equal) check, so we can drop the -stride[i].
+ *
+ * TODO is assumption about `offset + stride[i]` legit?
+ */
+ for (unsigned i = 0; i < so->num_targets; i++) {
+ struct pipe_stream_output_target *target = so->targets[i];
+ unsigned stride = info->stride[i] * 4; /* convert dwords->bytes */
+ if (target) {
+ uint32_t max = target->buffer_size / stride;
+ maxvtxcnt = MIN2(maxvtxcnt, max);
+ }
+ }
+
+ return maxvtxcnt;
+}
+
+void
+ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ const struct pipe_draw_info *info, uint32_t dirty)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+ struct fd_constbuf_stateobj *constbuf;
+ bool shader_dirty;
+
+ if (v->type == SHADER_VERTEX) {
+ constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX];
+ shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_VP);
+ } else if (v->type == SHADER_FRAGMENT) {
+ constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT];
+ shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_FP);
+ } else {
+ unreachable("bad shader type");
+ return;
+ }
+
+ emit_user_consts(v, ring, constbuf);
+ emit_ubos(v, ring, constbuf);
+ if (shader_dirty)
+ emit_immediates(v, ring);
+ }
+
+ /* emit driver params every time: */
+ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
+ if (info && (v->type == SHADER_VERTEX)) {
+ uint32_t offset = v->first_driver_param + 4; /* driver params after UBOs */
+ if (v->constlen >= offset) {
+ uint32_t vertex_params[4] = {
+ [IR3_DP_VTXID_BASE] = info->indexed ?
+ info->index_bias : info->start,
+ [IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
+ };
+
+ fd_wfi(ctx, ring);
+ ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
+ ARRAY_SIZE(vertex_params), vertex_params, NULL);
+
+ /* if needed, emit stream-out buffer addresses: */
+ if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
+ emit_tfbos(v, ring);
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 9f1b0769180..1bbbdbd224d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -29,9 +29,22 @@
#ifndef IR3_SHADER_H_
#define IR3_SHADER_H_
+#include "pipe/p_state.h"
+
#include "ir3.h"
#include "disasm.h"
+/* driver param indices: */
+enum ir3_driver_param {
+ IR3_DP_VTXID_BASE = 0,
+ IR3_DP_VTXCNT_MAX = 1,
+};
+
+/* internal semantic used for passing vtxcnt to vertex shader to
+ * implement transform feedback:
+ */
+#define IR3_SEMANTIC_VTXCNT (TGSI_SEMANTIC_COUNT + 0)
+
typedef uint16_t ir3_semantic; /* semantic name + index */
static inline ir3_semantic
ir3_semantic_name(uint8_t name, uint16_t index)
@@ -100,6 +113,9 @@ ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b)
struct ir3_shader_variant {
struct fd_bo *bo;
+ /* variant id (for debug) */
+ uint32_t id;
+
struct ir3_shader_key key;
struct ir3_info info;
@@ -192,26 +208,44 @@ struct ir3_shader_variant {
struct ir3_shader {
enum shader_t type;
+ /* shader id (for debug): */
+ uint32_t id;
+ uint32_t variant_count;
+
struct ir3_compiler *compiler;
struct pipe_context *pctx;
const struct tgsi_token *tokens;
+ struct pipe_stream_output_info stream_output;
struct ir3_shader_variant *variants;
-
- /* so far, only used for blit_prog shader.. values for
- * VPC_VARYING_PS_REPL[i].MODE
- */
- uint32_t vpsrepl[8];
};
void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
struct ir3_shader * ir3_shader_create(struct pipe_context *pctx,
- const struct tgsi_token *tokens, enum shader_t type);
+ const struct pipe_shader_state *cso, enum shader_t type);
void ir3_shader_destroy(struct ir3_shader *shader);
struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
struct ir3_shader_key key);
+void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
+
+struct fd_ringbuffer;
+void ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ const struct pipe_draw_info *info, uint32_t dirty);
+
+static inline const char *
+ir3_shader_stage(struct ir3_shader *shader)
+{
+ switch (shader->type) {
+ case SHADER_VERTEX: return "VERT";
+ case SHADER_FRAGMENT: return "FRAG";
+ case SHADER_COMPUTE: return "CL";
+ default:
+ unreachable("invalid type");
+ return NULL;
+ }
+}
/*
* Helper/util:
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index dcf63543219..6466fa594f9 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -33,20 +33,20 @@
struct i915_context;
-static INLINE size_t
+static inline size_t
i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch)
{
return batch->size - (batch->ptr - batch->map);
}
-static INLINE boolean
+static inline boolean
i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch,
size_t dwords)
{
return dwords * 4 <= i915_winsys_batchbuffer_space(batch);
}
-static INLINE void
+static inline void
i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch,
unsigned dword)
{
@@ -54,7 +54,7 @@ i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch,
batch->ptr += 4;
}
-static INLINE void
+static inline void
i915_winsys_batchbuffer_float(struct i915_winsys_batchbuffer *batch,
float f)
{
@@ -64,7 +64,7 @@ i915_winsys_batchbuffer_float(struct i915_winsys_batchbuffer *batch,
i915_winsys_batchbuffer_dword_unchecked(batch, uif.ui);
}
-static INLINE void
+static inline void
i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch,
unsigned dword)
{
@@ -72,7 +72,7 @@ i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch,
i915_winsys_batchbuffer_dword_unchecked(batch, dword);
}
-static INLINE void
+static inline void
i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
void *data,
size_t size)
@@ -83,7 +83,7 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
batch->ptr += size;
}
-static INLINE boolean
+static inline boolean
i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer **buffers,
int num_of_buffers)
@@ -91,7 +91,7 @@ i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch,
return batch->iws->validate_buffers(batch, buffers, num_of_buffers);
}
-static INLINE int
+static inline int
i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *buffer,
enum i915_winsys_buffer_usage usage,
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 40abf3c577f..c8c7d64f5cb 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -339,7 +339,7 @@ struct i915_context {
#define I915_DST_VARS 4
#define I915_DST_RECT 8
-static INLINE
+static inline
void i915_set_flush_dirty(struct i915_context *i915, unsigned flush)
{
i915->hardware_dirty |= I915_HW_FLUSH;
@@ -408,7 +408,7 @@ struct pipe_context *i915_create_context(struct pipe_screen *screen,
* Inline conversion functions. These are better-typed than the
* macros used previously:
*/
-static INLINE struct i915_context *
+static inline struct i915_context *
i915_context( struct pipe_context *pipe )
{
return (struct i915_context *)pipe;
diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h
index 079882c811f..0f12a592ae8 100644
--- a/src/gallium/drivers/i915/i915_debug.h
+++ b/src/gallium/drivers/i915/i915_debug.h
@@ -48,13 +48,13 @@ struct i915_winsys_batchbuffer;
extern unsigned i915_debug;
#ifdef DEBUG
-static INLINE boolean
+static inline boolean
I915_DBG_ON(unsigned flags)
{
return i915_debug & flags;
}
-static INLINE void
+static inline void
I915_DBG(unsigned flags, const char *fmt, ...)
{
if (I915_DBG_ON(flags)) {
@@ -67,7 +67,7 @@ I915_DBG(unsigned flags, const char *fmt, ...)
}
#else
#define I915_DBG_ON(flags) (0)
-static INLINE void I915_DBG(unsigned flags, const char *fmt, ...) {}
+static inline void I915_DBG(unsigned flags, const char *fmt, ...) {}
#endif
void i915_debug_init(struct i915_screen *i915);
diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h
index a4dbcb4d271..adc42542fea 100644
--- a/src/gallium/drivers/i915/i915_fpc.h
+++ b/src/gallium/drivers/i915/i915_fpc.h
@@ -136,7 +136,7 @@ struct i915_fp_compile {
/* One neat thing about the UREG representation:
*/
-static INLINE int
+static inline int
swizzle(int reg, uint x, uint y, uint z, uint w)
{
assert(x <= SRC_ONE);
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 38a33888166..456be9d92ca 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -111,7 +111,7 @@ static const float cos_constants[4] = { 1.0,
/**
* component-wise negation of ureg
*/
-static INLINE int
+static inline int
negate(int reg, int x, int y, int z, int w)
{
/* Another neat thing about the UREG representation */
diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c
index 248e21e02da..ea84efd1d17 100644
--- a/src/gallium/drivers/i915/i915_prim_emit.c
+++ b/src/gallium/drivers/i915/i915_prim_emit.c
@@ -53,7 +53,7 @@ struct setup_stage {
/**
* Basically a cast wrapper.
*/
-static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
+static inline struct setup_stage *setup_stage( struct draw_stage *stage )
{
return (struct setup_stage *)stage;
}
@@ -65,7 +65,7 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
* have a couple of slots at the beginning (1-dword header, 4-dword
* clip pos) that we ignore here.
*/
-static INLINE void
+static inline void
emit_hw_vertex( struct i915_context *i915,
const struct vertex_header *vertex)
{
@@ -124,7 +124,7 @@ emit_hw_vertex( struct i915_context *i915,
-static INLINE void
+static inline void
emit_prim( struct draw_stage *stage,
struct prim_header *prim,
unsigned hwprim,
diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c
index d134dbb1620..8f61f151e0c 100644
--- a/src/gallium/drivers/i915/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915/i915_prim_vbuf.c
@@ -96,7 +96,7 @@ struct i915_vbuf_render {
/**
* Basically a cast wrapper.
*/
-static INLINE struct i915_vbuf_render *
+static inline struct i915_vbuf_render *
i915_vbuf_render(struct vbuf_render *render)
{
assert(render);
diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h
index ef99cfb5d3c..77fe8b70f79 100644
--- a/src/gallium/drivers/i915/i915_resource.h
+++ b/src/gallium/drivers/i915/i915_resource.h
@@ -94,14 +94,14 @@ void i915_init_resource_functions(struct i915_context *i915);
extern struct u_resource_vtbl i915_buffer_vtbl;
extern struct u_resource_vtbl i915_texture_vtbl;
-static INLINE struct i915_texture *i915_texture(struct pipe_resource *resource)
+static inline struct i915_texture *i915_texture(struct pipe_resource *resource)
{
struct i915_texture *tex = (struct i915_texture *)resource;
assert(tex->b.vtbl == &i915_texture_vtbl);
return tex;
}
-static INLINE struct i915_buffer *i915_buffer(struct pipe_resource *resource)
+static inline struct i915_buffer *i915_buffer(struct pipe_resource *resource)
{
struct i915_buffer *tex = (struct i915_buffer *)resource;
assert(tex->b.vtbl == &i915_buffer_vtbl);
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index 8ef73d6f2c2..9a3279ccb75 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -89,25 +89,25 @@ static const int bottom_offsets[6] = {
[PIPE_TEX_FACE_NEG_Z] = 16 + 5 * 8,
};
-static INLINE unsigned
+static inline unsigned
align_nblocksx(enum pipe_format format, unsigned width, unsigned align_to)
{
return align(util_format_get_nblocksx(format, width), align_to);
}
-static INLINE unsigned
+static inline unsigned
align_nblocksy(enum pipe_format format, unsigned width, unsigned align_to)
{
return align(util_format_get_nblocksy(format, width), align_to);
}
-static INLINE unsigned
+static inline unsigned
get_pot_stride(enum pipe_format format, unsigned width)
{
return util_next_power_of_two(util_format_get_stride(format, width));
}
-static INLINE const char*
+static inline const char*
get_tiling_string(enum i915_winsys_buffer_tile tile)
{
switch(tile) {
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 0590da07b9a..19a94a8e019 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -243,6 +243,10 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
@@ -463,21 +467,15 @@ i915_fence_reference(struct pipe_screen *screen,
}
static boolean
-i915_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct i915_screen *is = i915_screen(screen);
-
- return is->iws->fence_signalled(is->iws, fence) == 1;
-}
-
-static boolean
i915_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
struct i915_screen *is = i915_screen(screen);
+ if (!timeout)
+ return is->iws->fence_signalled(is->iws, fence) == 1;
+
return is->iws->fence_finish(is->iws, fence) == 1;
}
@@ -565,7 +563,6 @@ i915_screen_create(struct i915_winsys *iws)
is->base.context_create = i915_create_context;
is->base.fence_reference = i915_fence_reference;
- is->base.fence_signalled = i915_fence_signalled;
is->base.fence_finish = i915_fence_finish;
i915_init_screen_resource_functions(is);
diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h
index 99d3ffd3af9..3be941a1561 100644
--- a/src/gallium/drivers/i915/i915_screen.h
+++ b/src/gallium/drivers/i915/i915_screen.h
@@ -59,7 +59,7 @@ struct i915_screen
*/
-static INLINE struct i915_screen *
+static inline struct i915_screen *
i915_screen(struct pipe_screen *pscreen)
{
return (struct i915_screen *) pscreen;
diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c
index 4050cd4ac44..1c29e8ae671 100644
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -46,7 +46,7 @@
* (active) state every time a 4kb boundary is crossed.
*/
-static INLINE void set_dynamic(struct i915_context *i915,
+static inline void set_dynamic(struct i915_context *i915,
unsigned offset,
const unsigned state)
{
@@ -60,7 +60,7 @@ static INLINE void set_dynamic(struct i915_context *i915,
-static INLINE void set_dynamic_array(struct i915_context *i915,
+static inline void set_dynamic_array(struct i915_context *i915,
unsigned offset,
const unsigned *src,
unsigned dwords)
diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c
index d244a349fce..c4a6cae1beb 100644
--- a/src/gallium/drivers/i915/i915_state_immediate.c
+++ b/src/gallium/drivers/i915/i915_state_immediate.c
@@ -39,7 +39,7 @@
/* Convinience function to check immediate state.
*/
-static INLINE void set_immediate(struct i915_context *i915,
+static inline void set_immediate(struct i915_context *i915,
unsigned offset,
const unsigned state)
{
diff --git a/src/gallium/drivers/i915/i915_state_inlines.h b/src/gallium/drivers/i915/i915_state_inlines.h
index d4c5ab69555..015ea32933b 100644
--- a/src/gallium/drivers/i915/i915_state_inlines.h
+++ b/src/gallium/drivers/i915/i915_state_inlines.h
@@ -34,7 +34,7 @@
#include "i915_reg.h"
-static INLINE unsigned
+static inline unsigned
i915_translate_compare_func(unsigned func)
{
switch (func) {
@@ -59,7 +59,7 @@ i915_translate_compare_func(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
i915_translate_shadow_compare_func(unsigned func)
{
switch (func) {
@@ -84,7 +84,7 @@ i915_translate_shadow_compare_func(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
i915_translate_stencil_op(unsigned op)
{
switch (op) {
@@ -109,7 +109,7 @@ i915_translate_stencil_op(unsigned op)
}
}
-static INLINE unsigned
+static inline unsigned
i915_translate_blend_factor(unsigned factor)
{
switch (factor) {
@@ -148,7 +148,7 @@ i915_translate_blend_factor(unsigned factor)
}
}
-static INLINE unsigned
+static inline unsigned
i915_translate_blend_func(unsigned mode)
{
switch (mode) {
@@ -168,7 +168,7 @@ i915_translate_blend_func(unsigned mode)
}
-static INLINE unsigned
+static inline unsigned
i915_translate_logic_op(unsigned opcode)
{
switch (opcode) {
@@ -211,7 +211,7 @@ i915_translate_logic_op(unsigned opcode)
-static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr )
+static inline boolean i915_validate_vertices( unsigned hw_prim, unsigned nr )
{
boolean ok;
diff --git a/src/gallium/drivers/ilo/Makefile.am b/src/gallium/drivers/ilo/Makefile.am
index a8785a5e8c4..1f14153748e 100644
--- a/src/gallium/drivers/ilo/Makefile.am
+++ b/src/gallium/drivers/ilo/Makefile.am
@@ -21,8 +21,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources
index e1bbb9a0781..7a7db938f92 100644
--- a/src/gallium/drivers/ilo/Makefile.sources
+++ b/src/gallium/drivers/ilo/Makefile.sources
@@ -1,5 +1,4 @@
C_SOURCES := \
- core/ilo_buffer.h \
core/ilo_builder.c \
core/ilo_builder.h \
core/ilo_builder_3d.h \
@@ -43,6 +42,7 @@ C_SOURCES := \
core/ilo_state_viewport.h \
core/ilo_state_zs.c \
core/ilo_state_zs.h \
+ core/ilo_vma.h \
core/intel_winsys.h \
ilo_blit.c \
ilo_blit.h \
@@ -65,8 +65,6 @@ C_SOURCES := \
ilo_public.h \
ilo_query.c \
ilo_query.h \
- ilo_resource.c \
- ilo_resource.h \
ilo_render.c \
ilo_render.h \
ilo_render_gen.h \
@@ -76,6 +74,8 @@ C_SOURCES := \
ilo_render_gen8.c \
ilo_render_media.c \
ilo_render_surface.c \
+ ilo_resource.c \
+ ilo_resource.h \
ilo_screen.c \
ilo_screen.h \
ilo_shader.c \
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
index 6d9e3699125..5efe9da2d22 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
@@ -39,6 +39,7 @@
#include "ilo_state_shader.h"
#include "ilo_state_viewport.h"
#include "ilo_state_zs.h"
+#include "ilo_vma.h"
#include "ilo_builder.h"
#include "ilo_builder_3d_top.h"
@@ -674,9 +675,10 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT;
- if (zs->depth_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->depth_bo,
- zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->z_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->z_vma->bo,
+ zs->z_vma->bo_offset + zs->depth[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
dw[1] = zs->depth[0];
@@ -691,9 +693,10 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
else
dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT;
- if (zs->depth_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->depth_bo,
- zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->z_vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->z_vma->bo,
+ zs->z_vma->bo_offset + zs->depth[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
@@ -724,9 +727,10 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT;
- if (zs->stencil_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->stencil_bo,
- zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->s_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->s_vma->bo,
+ zs->s_vma->bo_offset + zs->stencil[1],
+ (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
dw[1] = zs->stencil[0];
@@ -734,9 +738,10 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT;
- if (zs->stencil_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->stencil_bo,
- zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->s_vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->s_vma->bo,
+ zs->s_vma->bo_offset + zs->stencil[1],
+ (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
@@ -767,9 +772,10 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT;
- if (zs->hiz_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_bo,
- zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->hiz_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_vma->bo,
+ zs->hiz_vma->bo_offset + zs->hiz[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
dw[1] = zs->hiz[0];
@@ -777,9 +783,10 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT;
- if (zs->hiz_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_bo,
- zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->hiz_vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_vma->bo,
+ zs->hiz_vma->bo_offset + zs->hiz[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
index 8d30095e6f6..6e94fb25f1f 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
@@ -39,6 +39,7 @@
#include "ilo_state_surface.h"
#include "ilo_state_urb.h"
#include "ilo_state_vf.h"
+#include "ilo_vma.h"
#include "ilo_builder.h"
static inline void
@@ -318,8 +319,10 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
dw[3] = 0;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- if (b->need_bo)
- ilo_builder_batch_reloc64(builder, pos + 1, b->bo, b->vb[1], 0);
+ if (b->vma) {
+ ilo_builder_batch_reloc64(builder, pos + 1, b->vma->bo,
+ b->vma->bo_offset + b->vb[1], 0);
+ }
dw[3] |= b->vb[2];
} else {
@@ -331,9 +334,11 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
dw[3] |= vf->user_instancing[elem][1];
}
- if (b->need_bo) {
- ilo_builder_batch_reloc(builder, pos + 1, b->bo, b->vb[1], 0);
- ilo_builder_batch_reloc(builder, pos + 2, b->bo, b->vb[2], 0);
+ if (b->vma) {
+ ilo_builder_batch_reloc(builder, pos + 1, b->vma->bo,
+ b->vma->bo_offset + b->vb[1], 0);
+ ilo_builder_batch_reloc(builder, pos + 2, b->vma->bo,
+ b->vma->bo_offset + b->vb[2], 0);
}
}
@@ -429,9 +434,11 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- if (ib->need_bo) {
- ilo_builder_batch_reloc(builder, pos + 1, ib->bo, ib->ib[1], 0);
- ilo_builder_batch_reloc(builder, pos + 2, ib->bo, ib->ib[2], 0);
+ if (ib->vma) {
+ ilo_builder_batch_reloc(builder, pos + 1, ib->vma->bo,
+ ib->vma->bo_offset + ib->ib[1], 0);
+ ilo_builder_batch_reloc(builder, pos + 2, ib->vma->bo,
+ ib->vma->bo_offset + ib->ib[2], 0);
} else {
dw[1] = 0;
dw[2] = 0;
@@ -456,8 +463,9 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
dw[1] = ib->ib[0] |
builder->mocs << GEN8_IB_DW1_MOCS__SHIFT;
- if (ib->need_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, ib->bo, ib->ib[1], 0);
+ if (ib->vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, ib->vma->bo,
+ ib->vma->bo_offset + ib->ib[1], 0);
} else {
dw[2] = 0;
dw[3] = 0;
@@ -801,11 +809,11 @@ gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT |
sol->strides[buffer] << GEN7_SO_BUF_DW1_PITCH__SHIFT;
- if (sb->need_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, sb->bo,
- sb->so_buf[0], INTEL_RELOC_WRITE);
- ilo_builder_batch_reloc(builder, pos + 3, sb->bo,
- sb->so_buf[1], INTEL_RELOC_WRITE);
+ if (sb->vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, sb->vma->bo,
+ sb->vma->bo_offset + sb->so_buf[0], INTEL_RELOC_WRITE);
+ ilo_builder_batch_reloc(builder, pos + 3, sb->vma->bo,
+ sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE);
} else {
dw[2] = 0;
dw[3] = 0;
@@ -832,9 +840,9 @@ gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT |
builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT;
- if (sb->need_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, sb->bo,
- sb->so_buf[1], INTEL_RELOC_WRITE);
+ if (sb->vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, sb->vma->bo,
+ sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE);
} else {
dw[2] = 0;
dw[3] = 0;
@@ -842,9 +850,10 @@ gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
dw[4] = sb->so_buf[2];
- if (sb->need_write_offset_bo) {
- ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_bo,
- sizeof(uint32_t) * buffer, INTEL_RELOC_WRITE);
+ if (sb->write_offset_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_vma->bo,
+ sb->write_offset_vma->bo_offset + sizeof(uint32_t) * buffer,
+ INTEL_RELOC_WRITE);
} else {
dw[5] = 0;
dw[6] = 0;
@@ -1254,14 +1263,15 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
memcpy(dw, surf->surface, state_len << 2);
- if (surf->bo) {
+ if (surf->vma) {
const uint32_t mocs = (surf->scanout) ?
(GEN8_MOCS_MT_PTE | GEN8_MOCS_CT_L3) : builder->mocs;
dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT;
- ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo,
- surf->surface[8], (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
+ ilo_builder_surface_reloc64(builder, state_offset, 8, surf->vma->bo,
+ surf->vma->bo_offset + surf->surface[8],
+ (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
state_align = 32;
@@ -1271,15 +1281,16 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
memcpy(dw, surf->surface, state_len << 2);
- if (surf->bo) {
+ if (surf->vma) {
/*
* For scanouts, we should not enable caching in LLC. Since we only
* enable that on Gen8+, we are fine here.
*/
dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT;
- ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
- surf->surface[1], (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
+ ilo_builder_surface_reloc(builder, state_offset, 1, surf->vma->bo,
+ surf->vma->bo_offset + surf->surface[1],
+ (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
diff --git a/src/gallium/drivers/ilo/core/ilo_core.h b/src/gallium/drivers/ilo/core/ilo_core.h
index 0a7f7d9d3fe..da7db90a54b 100644
--- a/src/gallium/drivers/ilo/core/ilo_core.h
+++ b/src/gallium/drivers/ilo/core/ilo_core.h
@@ -29,15 +29,9 @@
#define ILO_CORE_H
#include "pipe/p_compiler.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_format.h"
#include "util/u_debug.h"
-#include "util/list.h"
-#include "util/u_format.h"
-#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "util/u_pointer.h"
#endif /* ILO_CORE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index 0d837d8a9d5..fa547ac5c36 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -40,269 +40,356 @@ enum {
IMAGE_TILING_W)
};
-struct ilo_image_params {
- const struct ilo_dev *dev;
- const struct pipe_resource *templ;
- unsigned valid_tilings;
+struct ilo_image_layout {
+ enum ilo_image_walk_type walk;
+ bool interleaved_samples;
- bool compressed;
+ uint8_t valid_tilings;
+ enum gen_surface_tiling tiling;
- unsigned h0, h1;
- unsigned max_x, max_y;
+ enum ilo_image_aux_type aux;
+
+ int align_i;
+ int align_j;
+
+ struct ilo_image_lod *lods;
+ int walk_layer_h0;
+ int walk_layer_h1;
+ int walk_layer_height;
+ int monolithic_width;
+ int monolithic_height;
};
-static void
-img_get_slice_size(const struct ilo_image *img,
- const struct ilo_image_params *params,
- unsigned level, unsigned *width, unsigned *height)
+static enum ilo_image_walk_type
+image_get_gen6_walk(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
{
- const struct pipe_resource *templ = params->templ;
- unsigned w, h;
+ ILO_DEV_ASSERT(dev, 6, 6);
- w = u_minify(img->width0, level);
- h = u_minify(img->height0, level);
+ /* TODO we want LODs to be page-aligned */
+ if (info->type == GEN6_SURFTYPE_3D)
+ return ILO_IMAGE_WALK_3D;
/*
- * From the Sandy Bridge PRM, volume 1 part 1, page 114:
+ * From the Sandy Bridge PRM, volume 1 part 1, page 115:
*
- * "The dimensions of the mip maps are first determined by applying the
- * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
- * if necessary, they are padded out to compression block boundaries."
+ * "The separate stencil buffer does not support mip mapping, thus the
+ * storage for LODs other than LOD 0 is not needed. The following
+ * QPitch equation applies only to the separate stencil buffer:
+ *
+ * QPitch = h_0"
+ *
+ * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels
+ * when bound.
*/
- w = align(w, img->block_width);
- h = align(h, img->block_height);
+ if (info->bind_zs && info->format == GEN6_FORMAT_R8_UINT)
+ return ILO_IMAGE_WALK_LOD;
+
+ /* compact spacing is not supported otherwise */
+ return ILO_IMAGE_WALK_LAYER;
+}
+
+static enum ilo_image_walk_type
+image_get_gen7_walk(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
+{
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (info->type == GEN6_SURFTYPE_3D)
+ return ILO_IMAGE_WALK_3D;
/*
- * From the Sandy Bridge PRM, volume 1 part 1, page 111:
- *
- * "If the surface is multisampled (4x), these values must be adjusted
- * as follows before proceeding:
+ * From the Ivy Bridge PRM, volume 1 part 1, page 111:
*
- * W_L = ceiling(W_L / 2) * 4
- * H_L = ceiling(H_L / 2) * 4"
+ * "note that the depth buffer and stencil buffer have an implied value
+ * of ARYSPC_FULL"
*
- * From the Ivy Bridge PRM, volume 1 part 1, page 108:
+ * From the Ivy Bridge PRM, volume 4 part 1, page 66:
*
- * "If the surface is multisampled and it is a depth or stencil surface
- * or Multisampled Surface StorageFormat in SURFACE_STATE is
- * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
- * proceeding:
+ * "If Multisampled Surface Storage Format is MSFMT_MSS and Number of
+ * Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array
+ * Spacing) must be set to ARYSPC_LOD0."
+ */
+ if (info->sample_count > 1)
+ assert(info->level_count == 1);
+ return (info->bind_zs || info->level_count > 1) ?
+ ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD;
+}
+
+static bool
+image_get_gen6_interleaved_samples(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * Gen6 supports only interleaved samples. It is not explicitly stated,
+ * but on Gen7+, render targets are expected to be UMS/CMS (samples
+ * non-interleaved) and depth/stencil buffers are expected to be IMS
+ * (samples interleaved).
*
- * #samples W_L = H_L =
- * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
- * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
- * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
- * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
+ * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
+ */
+ return (ilo_dev_gen(dev) == ILO_GEN(6) || info->bind_zs);
+}
+
+static uint8_t
+image_get_gen6_valid_tilings(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
+{
+ uint8_t valid_tilings = IMAGE_TILING_ALL;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (info->valid_tilings)
+ valid_tilings &= info->valid_tilings;
+
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 2, page 32:
*
- * For interleaved samples (4x), where pixels
+ * "Display/Overlay Y-Major not supported.
+ * X-Major required for Async Flips"
+ */
+ if (unlikely(info->bind_scanout))
+ valid_tilings &= IMAGE_TILING_X;
+
+ /*
+ * From the Sandy Bridge PRM, volume 3 part 2, page 158:
*
- * (x, y ) (x+1, y )
- * (x, y+1) (x+1, y+1)
+ * "The cursor surface address must be 4K byte aligned. The cursor must
+ * be in linear memory, it cannot be tiled."
+ */
+ if (unlikely(info->bind_cursor))
+ valid_tilings &= IMAGE_TILING_NONE;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 318:
*
- * would be is occupied by
+ * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
+ * Depth Buffer is not supported."
*
- * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
- * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
- * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
- * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
+ * "The Depth Buffer, if tiled, must use Y-Major tiling."
*
- * Thus the need to
+ * From the Sandy Bridge PRM, volume 1 part 2, page 22:
*
- * w = align(w, 2) * 2;
- * y = align(y, 2) * 2;
+ * "W-Major Tile Format is used for separate stencil."
*/
- if (img->interleaved_samples) {
- switch (templ->nr_samples) {
- case 0:
- case 1:
- break;
- case 2:
- w = align(w, 2) * 2;
- break;
- case 4:
- w = align(w, 2) * 2;
- h = align(h, 2) * 2;
- break;
- case 8:
- w = align(w, 2) * 4;
- h = align(h, 2) * 2;
- break;
- case 16:
- w = align(w, 2) * 4;
- h = align(h, 2) * 4;
- break;
- default:
- assert(!"unsupported sample count");
- break;
- }
+ if (info->bind_zs) {
+ if (info->format == GEN6_FORMAT_R8_UINT)
+ valid_tilings &= IMAGE_TILING_W;
+ else
+ valid_tilings &= IMAGE_TILING_Y;
}
- /*
- * From the Ivy Bridge PRM, volume 1 part 1, page 108:
- *
- * "For separate stencil buffer, the width must be mutiplied by 2 and
- * height divided by 2..."
- *
- * To make things easier (for transfer), we will just double the stencil
- * stride in 3DSTATE_STENCIL_BUFFER.
- */
- w = align(w, img->align_i);
- h = align(h, img->align_j);
+ if (info->bind_surface_sampler ||
+ info->bind_surface_dp_render ||
+ info->bind_surface_dp_typed) {
+ /*
+ * From the Haswell PRM, volume 2d, page 233:
+ *
+ * "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
+ * (Tiled Surface) must be TRUE."
+ */
+ if (info->sample_count > 1)
+ valid_tilings &= ~IMAGE_TILING_NONE;
- *width = w;
- *height = h;
-}
+ if (ilo_dev_gen(dev) < ILO_GEN(8))
+ valid_tilings &= ~IMAGE_TILING_W;
+ }
-static unsigned
-img_get_num_layers(const struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
- unsigned num_layers = templ->array_size;
+ if (info->bind_surface_dp_render) {
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 2, page 32:
+ *
+ * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
+ * either TileX or Linear."
+ *
+ * From the Haswell PRM, volume 5, page 32:
+ *
+ * "NOTE: 128 BPP format color buffer (render target) supports
+ * Linear, TiledX and TiledY."
+ */
+ if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->block_size == 16)
+ valid_tilings &= ~IMAGE_TILING_Y;
- /* samples of the same index are stored in a layer */
- if (templ->nr_samples > 1 && !img->interleaved_samples)
- num_layers *= templ->nr_samples;
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+ *
+ * "This field (Surface Vertical Aligment) must be set to VALIGN_4
+ * for all tiled Y Render Target surfaces."
+ *
+ * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
+ *
+ * R32G32B32_FLOAT is not renderable and we only need an assert() here.
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
+ assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
+ }
- return num_layers;
+ return valid_tilings;
}
-static void
-img_init_layer_height(struct ilo_image *img,
- struct ilo_image_params *params)
+static uint64_t
+image_get_gen6_estimated_size(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
{
- const struct pipe_resource *templ = params->templ;
- unsigned num_layers;
+ /* padding not considered */
+ const uint64_t slice_size = info->width * info->height *
+ info->block_size / (info->block_width * info->block_height);
+ const uint64_t slice_count =
+ info->depth * info->array_size * info->sample_count;
+ const uint64_t estimated_size = slice_size * slice_count;
- if (img->walk != ILO_IMAGE_WALK_LAYER)
- return;
+ ILO_DEV_ASSERT(dev, 6, 8);
- num_layers = img_get_num_layers(img, params);
- if (num_layers <= 1)
- return;
+ if (info->level_count == 1)
+ return estimated_size;
+ else
+ return estimated_size * 4 / 3;
+}
+
+static enum gen_surface_tiling
+image_get_gen6_tiling(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ uint8_t valid_tilings)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (valid_tilings) {
+ case IMAGE_TILING_NONE:
+ return GEN6_TILING_NONE;
+ case IMAGE_TILING_X:
+ return GEN6_TILING_X;
+ case IMAGE_TILING_Y:
+ return GEN6_TILING_Y;
+ case IMAGE_TILING_W:
+ return GEN8_TILING_W;
+ default:
+ break;
+ }
/*
- * From the Sandy Bridge PRM, volume 1 part 1, page 115:
- *
- * "The following equation is used for surface formats other than
- * compressed textures:
- *
- * QPitch = (h0 + h1 + 11j)"
- *
- * "The equation for compressed textures (BC* and FXT1 surface formats)
- * follows:
- *
- * QPitch = (h0 + h1 + 11j) / 4"
- *
- * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
- * value calculated in the equation above, for every other odd Surface
- * Height starting from 1 i.e. 1,5,9,13"
- *
- * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
+ * X-tiling has the property that vertically adjacent pixels are usually in
+ * the same page. When the image size is less than a page, the image
+ * height is 1, or when the image is not accessed in blocks, there is no
+ * reason to tile.
*
- * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
- * buffer and stencil buffer have an implied value of ARYSPC_FULL):
- *
- * QPitch = (h0 + h1 + 12j)
- * QPitch = (h0 + h1 + 12j) / 4 (compressed)
- *
- * (There are many typos or missing words here...)"
- *
- * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
- * the base address. The PRM divides QPitch by 4 for compressed formats
- * because the block height for those formats are 4, and it wants QPitch to
- * mean the number of memory rows, as opposed to texel rows, between
- * slices. Since we use texel rows everywhere, we do not need to divide
- * QPitch by 4.
+ * Y-tiling is similar, where vertically adjacent pixels are usually in the
+ * same cacheline.
*/
- img->walk_layer_height = params->h0 + params->h1 +
- ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j;
+ if (valid_tilings & IMAGE_TILING_NONE) {
+ const uint64_t estimated_size =
+ image_get_gen6_estimated_size(dev, info);
- if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 &&
- img->height0 % 4 == 1)
- img->walk_layer_height += 4;
+ if (info->height == 1 || !(info->bind_surface_sampler ||
+ info->bind_surface_dp_render ||
+ info->bind_surface_dp_typed))
+ return GEN6_TILING_NONE;
+
+ if (estimated_size <= 64 ||
+ estimated_size > info->prefer_linear_threshold)
+ return GEN6_TILING_NONE;
+
+ if (estimated_size <= 2048)
+ valid_tilings &= ~IMAGE_TILING_X;
+ }
- params->max_y += img->walk_layer_height * (num_layers - 1);
+ return (valid_tilings & IMAGE_TILING_Y) ? GEN6_TILING_Y :
+ (valid_tilings & IMAGE_TILING_X) ? GEN6_TILING_X :
+ GEN6_TILING_NONE;
}
-static void
-img_init_lods(struct ilo_image *img,
- struct ilo_image_params *params)
+static bool
+image_get_gen6_hiz_enable(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
{
- const struct pipe_resource *templ = params->templ;
- unsigned cur_x, cur_y;
- unsigned lv;
+ ILO_DEV_ASSERT(dev, 6, 8);
- cur_x = 0;
- cur_y = 0;
- for (lv = 0; lv <= templ->last_level; lv++) {
- unsigned lod_w, lod_h;
+ /* depth buffer? */
+ if (!info->bind_zs ||
+ info->format == GEN6_FORMAT_R8_UINT ||
+ info->interleaved_stencil)
+ return false;
- img_get_slice_size(img, params, lv, &lod_w, &lod_h);
+ /* we want to be able to force 8x4 alignments */
+ if (info->type == GEN6_SURFTYPE_1D)
+ return false;
- img->lods[lv].x = cur_x;
- img->lods[lv].y = cur_y;
- img->lods[lv].slice_width = lod_w;
- img->lods[lv].slice_height = lod_h;
+ if (info->aux_disable)
+ return false;
- switch (img->walk) {
- case ILO_IMAGE_WALK_LAYER:
- /* MIPLAYOUT_BELOW */
- if (lv == 1)
- cur_x += lod_w;
- else
- cur_y += lod_h;
- break;
- case ILO_IMAGE_WALK_LOD:
- lod_h *= img_get_num_layers(img, params);
- if (lv == 1)
- cur_x += lod_w;
- else
- cur_y += lod_h;
+ if (ilo_debug & ILO_DEBUG_NOHIZ)
+ return false;
- /* every LOD begins at tile boundaries */
- if (templ->last_level > 0) {
- assert(img->format == PIPE_FORMAT_S8_UINT);
- cur_x = align(cur_x, 64);
- cur_y = align(cur_y, 64);
- }
- break;
- case ILO_IMAGE_WALK_3D:
- {
- const unsigned num_slices = u_minify(templ->depth0, lv);
- const unsigned num_slices_per_row = 1 << lv;
- const unsigned num_rows =
- (num_slices + num_slices_per_row - 1) / num_slices_per_row;
+ return true;
+}
- lod_w *= num_slices_per_row;
- lod_h *= num_rows;
+static bool
+image_get_gen7_mcs_enable(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ enum gen_surface_tiling tiling)
+{
+ ILO_DEV_ASSERT(dev, 7, 8);
- cur_y += lod_h;
- }
- break;
- }
+ if (!info->bind_surface_sampler && !info->bind_surface_dp_render)
+ return false;
- if (params->max_x < img->lods[lv].x + lod_w)
- params->max_x = img->lods[lv].x + lod_w;
- if (params->max_y < img->lods[lv].y + lod_h)
- params->max_y = img->lods[lv].y + lod_h;
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 77:
+ *
+ * "For Render Target and Sampling Engine Surfaces:If the surface is
+ * multisampled (Number of Multisamples any value other than
+ * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
+ *
+ * "This field must be set to 0 for all SINT MSRTs when all RT channels
+ * are not written"
+ */
+ if (info->sample_count > 1) {
+ if (ilo_dev_gen(dev) < ILO_GEN(8))
+ assert(!info->is_integer);
+ return true;
}
- if (img->walk == ILO_IMAGE_WALK_LAYER) {
- params->h0 = img->lods[0].slice_height;
+ if (info->aux_disable)
+ return false;
- if (templ->last_level > 0)
- params->h1 = img->lods[1].slice_height;
- else
- img_get_slice_size(img, params, 1, &cur_x, &params->h1);
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 326:
+ *
+ * "When MCS is buffer is used for color clear of non-multisampler
+ * render target, the following restrictions apply.
+ * - Support is limited to tiled render targets.
+ * - Support is for non-mip-mapped and non-array surface types only.
+ * - Clear is supported only on the full RT; i.e., no partial clear or
+ * overlapping clears.
+ * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
+ * 64bpp and 128bpp.
+ * ..."
+ *
+ * How about SURFTYPE_3D?
+ */
+ if (!info->bind_surface_dp_render ||
+ tiling == GEN6_TILING_NONE ||
+ info->level_count > 1 ||
+ info->array_size > 1)
+ return false;
+
+ switch (info->block_size) {
+ case 4:
+ case 8:
+ case 16:
+ return true;
+ default:
+ return false;
}
}
static void
-img_init_alignments(struct ilo_image *img,
- const struct ilo_image_params *params)
+image_get_gen6_alignments(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ int *align_i, int *align_j)
{
- const struct pipe_resource *templ = params->templ;
+ ILO_DEV_ASSERT(dev, 6, 6);
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 113:
@@ -335,13 +422,33 @@ img_init_alignments(struct ilo_image *img,
*
* align_i align_j
* compressed formats block width block height
- * PIPE_FORMAT_S8_UINT 4 2
+ * GEN6_FORMAT_R8_UINT 4 2
* other depth/stencil formats 4 4
* 4x multisampled 4 4
* bpp 96 4 2
* others 4 2 or 4
*/
+ *align_i = (info->compressed) ? info->block_width : 4;
+ if (info->compressed) {
+ *align_j = info->block_height;
+ } else if (info->bind_zs) {
+ *align_j = (info->format == GEN6_FORMAT_R8_UINT) ? 2 : 4;
+ } else {
+ *align_j = (info->sample_count > 1 || info->block_size != 12) ? 4 : 2;
+ }
+}
+
+static void
+image_get_gen7_alignments(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ enum gen_surface_tiling tiling,
+ int *align_i, int *align_j)
+{
+ int i, j;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 110:
*
@@ -383,465 +490,301 @@ img_init_alignments(struct ilo_image *img,
*
* align_i align_j
* compressed formats block width block height
- * PIPE_FORMAT_Z16_UNORM 8 4
- * PIPE_FORMAT_S8_UINT 8 8
+ * GEN6_FORMAT_R16_UNORM 8 4
+ * GEN6_FORMAT_R8_UINT 8 8
* other depth/stencil formats 4 4
* 2x or 4x multisampled 4 or 8 4
* tiled Y 4 or 8 4 (if rt)
- * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
+ * GEN6_FORMAT_R32G32B32_FLOAT 4 or 8 2
* others 4 or 8 2 or 4
*/
-
- if (params->compressed) {
- /* this happens to be the case */
- img->align_i = img->block_width;
- img->align_j = img->block_height;
- } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
- switch (img->format) {
- case PIPE_FORMAT_Z16_UNORM:
- img->align_i = 8;
- img->align_j = 4;
- break;
- case PIPE_FORMAT_S8_UINT:
- img->align_i = 8;
- img->align_j = 8;
- break;
- default:
- img->align_i = 4;
- img->align_j = 4;
- break;
- }
- } else {
- switch (img->format) {
- case PIPE_FORMAT_S8_UINT:
- img->align_i = 4;
- img->align_j = 2;
- break;
- default:
- img->align_i = 4;
- img->align_j = 4;
- break;
- }
+ if (info->compressed) {
+ i = info->block_width;
+ j = info->block_height;
+ } else if (info->bind_zs) {
+ switch (info->format) {
+ case GEN6_FORMAT_R16_UNORM:
+ i = 8;
+ j = 4;
+ break;
+ case GEN6_FORMAT_R8_UINT:
+ i = 8;
+ j = 8;
+ break;
+ default:
+ i = 4;
+ j = 4;
+ break;
}
} else {
const bool valign_4 =
- (templ->nr_samples > 1) ||
- (ilo_dev_gen(params->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
- img->tiling == GEN6_TILING_Y &&
- (templ->bind & PIPE_BIND_RENDER_TARGET));
-
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
- ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
- assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT);
-
- img->align_i = 4;
- img->align_j = (valign_4) ? 4 : 2;
- }
+ (info->sample_count > 1 || ilo_dev_gen(dev) >= ILO_GEN(8) ||
+ (tiling == GEN6_TILING_Y && info->bind_surface_dp_render));
- /*
- * the fact that align i and j are multiples of block width and height
- * respectively is what makes the size of the bo a multiple of the block
- * size, slices start at block boundaries, and many of the computations
- * work.
- */
- assert(img->align_i % img->block_width == 0);
- assert(img->align_j % img->block_height == 0);
+ if (ilo_dev_gen(dev) < ILO_GEN(8) && valign_4)
+ assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
- /* make sure align() works */
- assert(util_is_power_of_two(img->align_i) &&
- util_is_power_of_two(img->align_j));
- assert(util_is_power_of_two(img->block_width) &&
- util_is_power_of_two(img->block_height));
+ i = 4;
+ j = (valign_4) ? 4 : 2;
+ }
+
+ *align_i = i;
+ *align_j = j;
}
-static void
-img_init_tiling(struct ilo_image *img,
- const struct ilo_image_params *params)
+static bool
+image_init_gen6_hardware_layout(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
{
- const struct pipe_resource *templ = params->templ;
- unsigned preferred_tilings = params->valid_tilings;
-
- /* no fencing nor BLT support */
- if (preferred_tilings & ~IMAGE_TILING_W)
- preferred_tilings &= ~IMAGE_TILING_W;
-
- if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
- /*
- * heuristically set a minimum width/height for enabling tiling
- */
- if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X))
- preferred_tilings &= ~IMAGE_TILING_X;
-
- if ((img->width0 < 32 || img->height0 < 16) &&
- (img->width0 < 16 || img->height0 < 32) &&
- (preferred_tilings & ~IMAGE_TILING_Y))
- preferred_tilings &= ~IMAGE_TILING_Y;
- } else {
- /* force linear if we are not sure where the texture is bound to */
- if (preferred_tilings & IMAGE_TILING_NONE)
- preferred_tilings &= IMAGE_TILING_NONE;
- }
+ ILO_DEV_ASSERT(dev, 6, 8);
- /* prefer tiled over linear */
- if (preferred_tilings & IMAGE_TILING_Y)
- img->tiling = GEN6_TILING_Y;
- else if (preferred_tilings & IMAGE_TILING_X)
- img->tiling = GEN6_TILING_X;
- else if (preferred_tilings & IMAGE_TILING_W)
- img->tiling = GEN8_TILING_W;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ layout->walk = image_get_gen7_walk(dev, info);
else
- img->tiling = GEN6_TILING_NONE;
-}
+ layout->walk = image_get_gen6_walk(dev, info);
-static void
-img_init_walk_gen7(struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
+ layout->interleaved_samples =
+ image_get_gen6_interleaved_samples(dev, info);
- /*
- * It is not explicitly states, but render targets are expected to be
- * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
- * to be IMS (samples interleaved).
- *
- * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
- */
- if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
- /*
- * From the Ivy Bridge PRM, volume 1 part 1, page 111:
- *
- * "note that the depth buffer and stencil buffer have an implied
- * value of ARYSPC_FULL"
- */
- img->walk = (templ->target == PIPE_TEXTURE_3D) ?
- ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
+ layout->valid_tilings = image_get_gen6_valid_tilings(dev, info);
+ if (!layout->valid_tilings)
+ return false;
- img->interleaved_samples = true;
- } else {
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 66:
- *
- * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
- * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
- * Array Spacing) must be set to ARYSPC_LOD0."
- *
- * As multisampled resources are not mipmapped, we never use
- * ARYSPC_FULL for them.
- */
- if (templ->nr_samples > 1)
- assert(templ->last_level == 0);
+ layout->tiling = image_get_gen6_tiling(dev, info, layout->valid_tilings);
- img->walk =
- (templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
- (templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER :
- ILO_IMAGE_WALK_LOD;
+ if (image_get_gen6_hiz_enable(dev, info))
+ layout->aux = ILO_IMAGE_AUX_HIZ;
+ else if (ilo_dev_gen(dev) >= ILO_GEN(7) &&
+ image_get_gen7_mcs_enable(dev, info, layout->tiling))
+ layout->aux = ILO_IMAGE_AUX_MCS;
+ else
+ layout->aux = ILO_IMAGE_AUX_NONE;
- img->interleaved_samples = false;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ image_get_gen7_alignments(dev, info, layout->tiling,
+ &layout->align_i, &layout->align_j);
+ } else {
+ image_get_gen6_alignments(dev, info,
+ &layout->align_i, &layout->align_j);
}
+
+ return true;
}
-static void
-img_init_walk_gen6(struct ilo_image *img,
- const struct ilo_image_params *params)
+static bool
+image_init_gen6_transfer_layout(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
{
- /*
- * From the Sandy Bridge PRM, volume 1 part 1, page 115:
- *
- * "The separate stencil buffer does not support mip mapping, thus the
- * storage for LODs other than LOD 0 is not needed. The following
- * QPitch equation applies only to the separate stencil buffer:
- *
- * QPitch = h_0"
- *
- * GEN6 does not support compact spacing otherwise.
- */
- img->walk =
- (params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
- (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
- ILO_IMAGE_WALK_LAYER;
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* we can define our own layout to save space */
+ layout->walk = ILO_IMAGE_WALK_LOD;
+ layout->interleaved_samples = false;
+ layout->valid_tilings = IMAGE_TILING_NONE;
+ layout->tiling = GEN6_TILING_NONE;
+ layout->aux = ILO_IMAGE_AUX_NONE;
+ layout->align_i = info->block_width;
+ layout->align_j = info->block_height;
- /* GEN6 supports only interleaved samples */
- img->interleaved_samples = true;
+ return true;
}
static void
-img_init_walk(struct ilo_image *img,
- const struct ilo_image_params *params)
+image_get_gen6_slice_size(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout,
+ uint8_t level,
+ int *width, int *height)
{
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
- img_init_walk_gen7(img, params);
- else
- img_init_walk_gen6(img, params);
-}
+ int w, h;
-static unsigned
-img_get_valid_tilings(const struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
- const enum pipe_format format = img->format;
- unsigned valid_tilings = params->valid_tilings;
+ ILO_DEV_ASSERT(dev, 6, 8);
- /*
- * From the Sandy Bridge PRM, volume 1 part 2, page 32:
- *
- * "Display/Overlay Y-Major not supported.
- * X-Major required for Async Flips"
- */
- if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
- valid_tilings &= IMAGE_TILING_X;
+ w = u_minify(info->width, level);
+ h = u_minify(info->height, level);
/*
- * From the Sandy Bridge PRM, volume 3 part 2, page 158:
+ * From the Sandy Bridge PRM, volume 1 part 1, page 114:
*
- * "The cursor surface address must be 4K byte aligned. The cursor must
- * be in linear memory, it cannot be tiled."
+ * "The dimensions of the mip maps are first determined by applying the
+ * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
+ * if necessary, they are padded out to compression block boundaries."
*/
- if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
- valid_tilings &= IMAGE_TILING_NONE;
+ w = align(w, info->block_width);
+ h = align(h, info->block_height);
/*
- * From the Sandy Bridge PRM, volume 2 part 1, page 318:
+ * From the Sandy Bridge PRM, volume 1 part 1, page 111:
*
- * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
- * Depth Buffer is not supported."
+ * "If the surface is multisampled (4x), these values must be adjusted
+ * as follows before proceeding:
*
- * "The Depth Buffer, if tiled, must use Y-Major tiling."
+ * W_L = ceiling(W_L / 2) * 4
+ * H_L = ceiling(H_L / 2) * 4"
*
- * From the Sandy Bridge PRM, volume 1 part 2, page 22:
+ * From the Ivy Bridge PRM, volume 1 part 1, page 108:
*
- * "W-Major Tile Format is used for separate stencil."
+ * "If the surface is multisampled and it is a depth or stencil surface
+ * or Multisampled Surface StorageFormat in SURFACE_STATE is
+ * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
+ * proceeding:
+ *
+ * #samples W_L = H_L =
+ * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
+ * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
+ * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
+ * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
+ *
+ * For interleaved samples (4x), where pixels
+ *
+ * (x, y ) (x+1, y )
+ * (x, y+1) (x+1, y+1)
+ *
+ * would be is occupied by
+ *
+ * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
+ * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
+ * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
+ * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
+ *
+ * Thus the need to
+ *
+ * w = align(w, 2) * 2;
+ * y = align(y, 2) * 2;
*/
- if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
- switch (format) {
- case PIPE_FORMAT_S8_UINT:
- valid_tilings &= IMAGE_TILING_W;
+ if (layout->interleaved_samples) {
+ switch (info->sample_count) {
+ case 1:
+ break;
+ case 2:
+ w = align(w, 2) * 2;
+ break;
+ case 4:
+ w = align(w, 2) * 2;
+ h = align(h, 2) * 2;
+ break;
+ case 8:
+ w = align(w, 2) * 4;
+ h = align(h, 2) * 2;
+ break;
+ case 16:
+ w = align(w, 2) * 4;
+ h = align(h, 2) * 4;
break;
default:
- valid_tilings &= IMAGE_TILING_Y;
+ assert(!"unsupported sample count");
break;
}
}
- if (templ->bind & PIPE_BIND_RENDER_TARGET) {
- /*
- * From the Sandy Bridge PRM, volume 1 part 2, page 32:
- *
- * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
- * either TileX or Linear."
- *
- * From the Haswell PRM, volume 5, page 32:
- *
- * "NOTE: 128 BPP format color buffer (render target) supports
- * Linear, TiledX and TiledY."
- */
- if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16)
- valid_tilings &= ~IMAGE_TILING_Y;
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 63:
- *
- * "This field (Surface Vertical Aligment) must be set to VALIGN_4
- * for all tiled Y Render Target surfaces."
- *
- * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
- */
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
- ilo_dev_gen(params->dev) <= ILO_GEN(7.5) &&
- img->format == PIPE_FORMAT_R32G32B32_FLOAT)
- valid_tilings &= ~IMAGE_TILING_Y;
-
- valid_tilings &= ~IMAGE_TILING_W;
- }
-
- if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
- if (ilo_dev_gen(params->dev) < ILO_GEN(8))
- valid_tilings &= ~IMAGE_TILING_W;
- }
-
- /* no conflicting binding flags */
- assert(valid_tilings);
-
- return valid_tilings;
-}
-
-static void
-img_init_size_and_format(struct ilo_image *img,
- struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
- enum pipe_format format = templ->format;
- bool require_separate_stencil = false;
-
- img->target = templ->target;
- img->width0 = templ->width0;
- img->height0 = templ->height0;
- img->depth0 = templ->depth0;
- img->array_size = templ->array_size;
- img->level_count = templ->last_level + 1;
- img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
-
/*
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+ * From the Ivy Bridge PRM, volume 1 part 1, page 108:
*
- * "This field (Separate Stencil Buffer Enable) must be set to the same
- * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
+ * "For separate stencil buffer, the width must be mutiplied by 2 and
+ * height divided by 2..."
*
- * GEN7+ requires separate stencil buffers.
+ * To make things easier (for transfer), we will just double the stencil
+ * stride in 3DSTATE_STENCIL_BUFFER.
*/
- if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
- require_separate_stencil = true;
- else
- require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ);
- }
-
- switch (format) {
- case PIPE_FORMAT_ETC1_RGB8:
- format = PIPE_FORMAT_R8G8B8X8_UNORM;
- break;
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- if (require_separate_stencil) {
- format = PIPE_FORMAT_Z24X8_UNORM;
- img->separate_stencil = true;
- }
- break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- if (require_separate_stencil) {
- format = PIPE_FORMAT_Z32_FLOAT;
- img->separate_stencil = true;
- }
- break;
- default:
- break;
- }
+ w = align(w, layout->align_i);
+ h = align(h, layout->align_j);
- img->format = format;
- img->block_width = util_format_get_blockwidth(format);
- img->block_height = util_format_get_blockheight(format);
- img->block_size = util_format_get_blocksize(format);
-
- params->valid_tilings = img_get_valid_tilings(img, params);
- params->compressed = util_format_is_compressed(img->format);
+ *width = w;
+ *height = h;
}
-static bool
-img_want_mcs(const struct ilo_image *img,
- const struct ilo_image_params *params)
+static int
+image_get_gen6_layer_count(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout)
{
- const struct pipe_resource *templ = params->templ;
- bool want_mcs = false;
+ int count = info->array_size;
- /* MCS is for RT on GEN7+ */
- if (ilo_dev_gen(params->dev) < ILO_GEN(7))
- return false;
+ ILO_DEV_ASSERT(dev, 6, 8);
- if (templ->target != PIPE_TEXTURE_2D ||
- !(templ->bind & PIPE_BIND_RENDER_TARGET))
- return false;
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 77:
- *
- * "For Render Target and Sampling Engine Surfaces:If the surface is
- * multisampled (Number of Multisamples any value other than
- * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
- *
- * "This field must be set to 0 for all SINT MSRTs when all RT channels
- * are not written"
- */
- if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) {
- want_mcs = true;
- } else if (templ->nr_samples <= 1) {
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 326:
- *
- * "When MCS is buffer is used for color clear of non-multisampler
- * render target, the following restrictions apply.
- * - Support is limited to tiled render targets.
- * - Support is for non-mip-mapped and non-array surface types
- * only.
- * - Clear is supported only on the full RT; i.e., no partial clear
- * or overlapping clears.
- * - MCS buffer for non-MSRT is supported only for RT formats
- * 32bpp, 64bpp and 128bpp.
- * ..."
- */
- if (img->tiling != GEN6_TILING_NONE &&
- templ->last_level == 0 && templ->array_size == 1) {
- switch (img->block_size) {
- case 4:
- case 8:
- case 16:
- want_mcs = true;
- break;
- default:
- break;
- }
- }
- }
+ /* samples of the same index are stored in a layer */
+ if (!layout->interleaved_samples)
+ count *= info->sample_count;
- return want_mcs;
+ return count;
}
-static bool
-img_want_hiz(const struct ilo_image *img,
- const struct ilo_image_params *params)
+static void
+image_get_gen6_walk_layer_heights(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
{
- const struct pipe_resource *templ = params->templ;
- const struct util_format_description *desc =
- util_format_description(templ->format);
+ ILO_DEV_ASSERT(dev, 6, 8);
- if (ilo_debug & ILO_DEBUG_NOHIZ)
- return false;
+ layout->walk_layer_h0 = layout->lods[0].slice_height;
- /* we want 8x4 aligned levels */
- if (templ->target == PIPE_TEXTURE_1D)
- return false;
-
- if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
- return false;
-
- if (!util_format_has_depth(desc))
- return false;
+ if (info->level_count > 1) {
+ layout->walk_layer_h1 = layout->lods[1].slice_height;
+ } else {
+ int dummy;
+ image_get_gen6_slice_size(dev, info, layout, 1,
+ &dummy, &layout->walk_layer_h1);
+ }
- /* no point in having HiZ */
- if (templ->usage == PIPE_USAGE_STAGING)
- return false;
+ if (image_get_gen6_layer_count(dev, info, layout) == 1) {
+ layout->walk_layer_height = 0;
+ return;
+ }
/*
- * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
- * for every level. This is generally fine except on GEN6, where HiZ and
- * separate stencil are enabled and disabled at the same time. When the
- * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
- * can result in incompatible formats.
+ * From the Sandy Bridge PRM, volume 1 part 1, page 115:
+ *
+ * "The following equation is used for surface formats other than
+ * compressed textures:
+ *
+ * QPitch = (h0 + h1 + 11j)"
+ *
+ * "The equation for compressed textures (BC* and FXT1 surface formats)
+ * follows:
+ *
+ * QPitch = (h0 + h1 + 11j) / 4"
+ *
+ * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
+ * value calculated in the equation above, for every other odd Surface
+ * Height starting from 1 i.e. 1,5,9,13"
+ *
+ * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
+ *
+ * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
+ * buffer and stencil buffer have an implied value of ARYSPC_FULL):
+ *
+ * QPitch = (h0 + h1 + 12j)
+ * QPitch = (h0 + h1 + 12j) / 4 (compressed)
+ *
+ * (There are many typos or missing words here...)"
+ *
+ * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
+ * the base address. The PRM divides QPitch by 4 for compressed formats
+ * because the block height for those formats are 4, and it wants QPitch to
+ * mean the number of memory rows, as opposed to texel rows, between
+ * slices. Since we use texel rows everywhere, we do not need to divide
+ * QPitch by 4.
*/
- if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
- templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
- templ->last_level)
- return false;
+ layout->walk_layer_height = layout->walk_layer_h0 + layout->walk_layer_h1 +
+ ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
- return true;
-}
-
-static void
-img_init_aux(struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- if (img_want_hiz(img, params))
- img->aux.type = ILO_IMAGE_AUX_HIZ;
- else if (img_want_mcs(img, params))
- img->aux.type = ILO_IMAGE_AUX_MCS;
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && info->sample_count > 1 &&
+ info->height % 4 == 1)
+ layout->walk_layer_height += 4;
}
static void
-img_align(struct ilo_image *img, struct ilo_image_params *params)
+image_get_gen6_monolithic_size(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout,
+ int max_x, int max_y)
{
- const struct pipe_resource *templ = params->templ;
int align_w = 1, align_h = 1, pad_h = 0;
+ ILO_DEV_ASSERT(dev, 6, 8);
+
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 118:
*
@@ -864,15 +807,15 @@ img_align(struct ilo_image *img, struct ilo_image_params *params)
* padding purposes. The value of 4 for j still applies for mip level
* alignment and QPitch calculation."
*/
- if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
- align_w = MAX2(align_w, img->align_i);
- align_h = MAX2(align_h, img->align_j);
+ if (info->bind_surface_sampler) {
+ align_w = MAX2(align_w, layout->align_i);
+ align_h = MAX2(align_h, layout->align_j);
- if (templ->target == PIPE_TEXTURE_CUBE)
+ if (info->type == GEN6_SURFTYPE_CUBE)
pad_h += 2;
- if (params->compressed)
- align_h = MAX2(align_h, img->align_j * 2);
+ if (info->compressed)
+ align_h = MAX2(align_h, layout->align_j * 2);
}
/*
@@ -881,149 +824,288 @@ img_align(struct ilo_image *img, struct ilo_image_params *params)
* "If the surface contains an odd number of rows of data, a final row
* below the surface must be allocated."
*/
- if (templ->bind & PIPE_BIND_RENDER_TARGET)
+ if (info->bind_surface_dp_render)
align_h = MAX2(align_h, 2);
/*
* Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ
* for unaligned non-mipmapped and non-array images.
*/
- if (img->aux.type == ILO_IMAGE_AUX_HIZ &&
- templ->last_level == 0 &&
- templ->array_size == 1 &&
- templ->depth0 == 1) {
+ if (layout->aux == ILO_IMAGE_AUX_HIZ &&
+ info->level_count == 1 && info->array_size == 1 && info->depth == 1) {
align_w = MAX2(align_w, 8);
align_h = MAX2(align_h, 4);
}
- params->max_x = align(params->max_x, align_w);
- params->max_y = align(params->max_y + pad_h, align_h);
+ layout->monolithic_width = align(max_x, align_w);
+ layout->monolithic_height = align(max_y + pad_h, align_h);
}
-/* note that this may force the texture to be linear */
static void
-img_calculate_bo_size(struct ilo_image *img,
- const struct ilo_image_params *params)
+image_get_gen6_lods(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
{
- assert(params->max_x % img->block_width == 0);
- assert(params->max_y % img->block_height == 0);
- assert(img->walk_layer_height % img->block_height == 0);
+ const int layer_count = image_get_gen6_layer_count(dev, info, layout);
+ int cur_x, cur_y, max_x, max_y;
+ uint8_t lv;
- img->bo_stride =
- (params->max_x / img->block_width) * img->block_size;
- img->bo_height = params->max_y / img->block_height;
+ ILO_DEV_ASSERT(dev, 6, 8);
- while (true) {
- unsigned w = img->bo_stride, h = img->bo_height;
- unsigned align_w, align_h;
+ cur_x = 0;
+ cur_y = 0;
+ max_x = 0;
+ max_y = 0;
+ for (lv = 0; lv < info->level_count; lv++) {
+ int slice_w, slice_h, lod_w, lod_h;
- /*
- * From the Haswell PRM, volume 5, page 163:
- *
- * "For linear surfaces, additional padding of 64 bytes is required
- * at the bottom of the surface. This is in addition to the padding
- * required above."
- */
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) &&
- (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
- img->tiling == GEN6_TILING_NONE)
- h += (64 + img->bo_stride - 1) / img->bo_stride;
+ image_get_gen6_slice_size(dev, info, layout, lv, &slice_w, &slice_h);
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "- For linear render target surfaces, the pitch must be a
- * multiple of the element size for non-YUV surface formats.
- * Pitch must be a multiple of 2 * element size for YUV surface
- * formats.
- * - For other linear surfaces, the pitch can be any multiple of
- * bytes.
- * - For tiled surfaces, the pitch must be a multiple of the tile
- * width."
- *
- * Different requirements may exist when the bo is used in different
- * places, but our alignments here should be good enough that we do not
- * need to check params->templ->bind.
- */
- switch (img->tiling) {
- case GEN6_TILING_X:
- align_w = 512;
- align_h = 8;
+ layout->lods[lv].x = cur_x;
+ layout->lods[lv].y = cur_y;
+ layout->lods[lv].slice_width = slice_w;
+ layout->lods[lv].slice_height = slice_h;
+
+ switch (layout->walk) {
+ case ILO_IMAGE_WALK_LAYER:
+ lod_w = slice_w;
+ lod_h = slice_h;
+
+ /* MIPLAYOUT_BELOW */
+ if (lv == 1)
+ cur_x += lod_w;
+ else
+ cur_y += lod_h;
break;
- case GEN6_TILING_Y:
- align_w = 128;
- align_h = 32;
+ case ILO_IMAGE_WALK_LOD:
+ lod_w = slice_w;
+ lod_h = slice_h * layer_count;
+
+ if (lv == 1)
+ cur_x += lod_w;
+ else
+ cur_y += lod_h;
+
+ /* every LOD begins at tile boundaries */
+ if (info->level_count > 1) {
+ assert(info->format == GEN6_FORMAT_R8_UINT);
+ cur_x = align(cur_x, 64);
+ cur_y = align(cur_y, 64);
+ }
break;
- case GEN8_TILING_W:
- /*
- * From the Sandy Bridge PRM, volume 1 part 2, page 22:
- *
- * "A 4KB tile is subdivided into 8-high by 8-wide array of
- * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
- * bytes."
- */
- align_w = 64;
- align_h = 64;
+ case ILO_IMAGE_WALK_3D:
+ {
+ const int slice_count = u_minify(info->depth, lv);
+ const int slice_count_per_row = 1 << lv;
+ const int row_count =
+ (slice_count + slice_count_per_row - 1) / slice_count_per_row;
+
+ lod_w = slice_w * slice_count_per_row;
+ lod_h = slice_h * row_count;
+ }
+
+ cur_y += lod_h;
break;
default:
- assert(img->tiling == GEN6_TILING_NONE);
- /* some good enough values */
- align_w = 64;
- align_h = 2;
+ assert(!"unknown walk type");
+ lod_w = 0;
+ lod_h = 0;
break;
}
- w = align(w, align_w);
- h = align(h, align_h);
-
- /* make sure the bo is mappable */
- if (img->tiling != GEN6_TILING_NONE) {
- /*
- * Usually only the first 256MB of the GTT is mappable.
- *
- * See also how intel_context::max_gtt_map_object_size is calculated.
- */
- const size_t mappable_gtt_size = 256 * 1024 * 1024;
-
- /*
- * Be conservative. We may be able to switch from VALIGN_4 to
- * VALIGN_2 if the image was Y-tiled, but let's keep it simple.
- */
- if (mappable_gtt_size / w / 4 < h) {
- if (params->valid_tilings & IMAGE_TILING_NONE) {
- img->tiling = GEN6_TILING_NONE;
- /* MCS support for non-MSRTs is limited to tiled RTs */
- if (img->aux.type == ILO_IMAGE_AUX_MCS &&
- params->templ->nr_samples <= 1)
- img->aux.type = ILO_IMAGE_AUX_NONE;
-
- continue;
- } else {
- ilo_warn("cannot force texture to be linear\n");
- }
- }
- }
+ if (max_x < layout->lods[lv].x + lod_w)
+ max_x = layout->lods[lv].x + lod_w;
+ if (max_y < layout->lods[lv].y + lod_h)
+ max_y = layout->lods[lv].y + lod_h;
+ }
+
+ if (layout->walk == ILO_IMAGE_WALK_LAYER) {
+ image_get_gen6_walk_layer_heights(dev, info, layout);
+ if (layer_count > 1)
+ max_y += layout->walk_layer_height * (layer_count - 1);
+ } else {
+ layout->walk_layer_h0 = 0;
+ layout->walk_layer_h1 = 0;
+ layout->walk_layer_height = 0;
+ }
+
+ image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y);
+}
+
+static bool
+image_bind_gpu(const struct ilo_image_info *info)
+{
+ return (info->bind_surface_sampler ||
+ info->bind_surface_dp_render ||
+ info->bind_surface_dp_typed ||
+ info->bind_zs ||
+ info->bind_scanout ||
+ info->bind_cursor);
+}
+
+static bool
+image_validate_gen6(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 314:
+ *
+ * "The separate stencil buffer is always enabled, thus the field in
+ * 3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil
+ * buffer has been removed Surface formats with interleaved depth and
+ * stencil are no longer supported"
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->bind_zs)
+ assert(!info->interleaved_stencil);
+
+ return true;
+}
+
+static bool
+image_get_gen6_layout(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!image_validate_gen6(dev, info))
+ return false;
+
+ if (image_bind_gpu(info) || info->level_count > 1) {
+ if (!image_init_gen6_hardware_layout(dev, info, layout))
+ return false;
+ } else {
+ if (!image_init_gen6_transfer_layout(dev, info, layout))
+ return false;
+ }
+
+ /*
+ * the fact that align i and j are multiples of block width and height
+ * respectively is what makes the size of the bo a multiple of the block
+ * size, slices start at block boundaries, and many of the computations
+ * work.
+ */
+ assert(layout->align_i % info->block_width == 0);
+ assert(layout->align_j % info->block_height == 0);
+
+ /* make sure align() works */
+ assert(util_is_power_of_two(layout->align_i) &&
+ util_is_power_of_two(layout->align_j));
+ assert(util_is_power_of_two(info->block_width) &&
+ util_is_power_of_two(info->block_height));
+
+ image_get_gen6_lods(dev, info, layout);
+
+ assert(layout->walk_layer_height % info->block_height == 0);
+ assert(layout->monolithic_width % info->block_width == 0);
+ assert(layout->monolithic_height % info->block_height == 0);
+
+ return true;
+}
+
+static bool
+image_set_gen6_bo_size(struct ilo_image *img,
+ const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout)
+{
+ int stride, height;
+ int align_w, align_h;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ stride = (layout->monolithic_width / info->block_width) * info->block_size;
+ height = layout->monolithic_height / info->block_height;
+
+ /*
+ * From the Haswell PRM, volume 5, page 163:
+ *
+ * "For linear surfaces, additional padding of 64 bytes is required
+ * at the bottom of the surface. This is in addition to the padding
+ * required above."
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && info->bind_surface_sampler &&
+ layout->tiling == GEN6_TILING_NONE)
+ height += (64 + stride - 1) / stride;
- img->bo_stride = w;
- img->bo_height = h;
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+ *
+ * "- For linear render target surfaces, the pitch must be a multiple
+ * of the element size for non-YUV surface formats. Pitch must be a
+ * multiple of 2 * element size for YUV surface formats.
+ *
+ * - For other linear surfaces, the pitch can be any multiple of
+ * bytes.
+ * - For tiled surfaces, the pitch must be a multiple of the tile
+ * width."
+ *
+ * Different requirements may exist when the image is used in different
+ * places, but our alignments here should be good enough that we do not
+ * need to check info->bind_x.
+ */
+ switch (layout->tiling) {
+ case GEN6_TILING_X:
+ align_w = 512;
+ align_h = 8;
+ break;
+ case GEN6_TILING_Y:
+ align_w = 128;
+ align_h = 32;
+ break;
+ case GEN8_TILING_W:
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 2, page 22:
+ *
+ * "A 4KB tile is subdivided into 8-high by 8-wide array of
+ * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
+ * bytes."
+ */
+ align_w = 64;
+ align_h = 64;
+ break;
+ default:
+ assert(layout->tiling == GEN6_TILING_NONE);
+ /* some good enough values */
+ align_w = 64;
+ align_h = 2;
break;
}
+
+ if (info->force_bo_stride) {
+ if (info->force_bo_stride % align_w || info->force_bo_stride < stride)
+ return false;
+
+ img->bo_stride = info->force_bo_stride;
+ } else {
+ img->bo_stride = align(stride, align_w);
+ }
+
+ img->bo_height = align(height, align_h);
+
+ return true;
}
-static void
-img_calculate_hiz_size(struct ilo_image *img,
- const struct ilo_image_params *params)
+static bool
+image_set_gen6_hiz(struct ilo_image *img,
+ const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout)
{
- const struct pipe_resource *templ = params->templ;
- const unsigned hz_align_j = 8;
+ const int hz_align_j = 8;
enum ilo_image_walk_type hz_walk;
- unsigned hz_width, hz_height, lv;
- unsigned hz_clear_w, hz_clear_h;
+ int hz_width, hz_height;
+ int hz_clear_w, hz_clear_h;
+ uint8_t lv;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
- assert(img->aux.type == ILO_IMAGE_AUX_HIZ);
+ assert(layout->aux == ILO_IMAGE_AUX_HIZ);
- assert(img->walk == ILO_IMAGE_WALK_LAYER ||
- img->walk == ILO_IMAGE_WALK_3D);
+ assert(layout->walk == ILO_IMAGE_WALK_LAYER ||
+ layout->walk == ILO_IMAGE_WALK_3D);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 312:
@@ -1036,8 +1118,8 @@ img_calculate_hiz_size(struct ilo_image *img,
*
* We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
*/
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
- hz_walk = img->walk;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ hz_walk = layout->walk;
else
hz_walk = ILO_IMAGE_WALK_LOD;
@@ -1051,16 +1133,16 @@ img_calculate_hiz_size(struct ilo_image *img,
switch (hz_walk) {
case ILO_IMAGE_WALK_LAYER:
{
- const unsigned h0 = align(params->h0, hz_align_j);
- const unsigned h1 = align(params->h1, hz_align_j);
- const unsigned htail =
- ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
- const unsigned hz_qpitch = h0 + h1 + htail;
+ const int h0 = align(layout->walk_layer_h0, hz_align_j);
+ const int h1 = align(layout->walk_layer_h1, hz_align_j);
+ const int htail =
+ ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
+ const int hz_qpitch = h0 + h1 + htail;
- hz_width = align(img->lods[0].slice_width, 16);
+ hz_width = align(layout->lods[0].slice_width, 16);
- hz_height = hz_qpitch * templ->array_size / 2;
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
+ hz_height = hz_qpitch * info->array_size / 2;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
hz_height = align(hz_height, 8);
img->aux.walk_layer_height = hz_qpitch;
@@ -1068,27 +1150,27 @@ img_calculate_hiz_size(struct ilo_image *img,
break;
case ILO_IMAGE_WALK_LOD:
{
- unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
- unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
- unsigned cur_tx, cur_ty;
+ int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT];
+ int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT];
+ int cur_tx, cur_ty;
/* figure out the tile offsets of LODs */
hz_width = 0;
hz_height = 0;
cur_tx = 0;
cur_ty = 0;
- for (lv = 0; lv <= templ->last_level; lv++) {
- unsigned tw, th;
+ for (lv = 0; lv < info->level_count; lv++) {
+ int tw, th;
lod_tx[lv] = cur_tx;
lod_ty[lv] = cur_ty;
- tw = align(img->lods[lv].slice_width, 16);
- th = align(img->lods[lv].slice_height, hz_align_j) *
- templ->array_size / 2;
+ tw = align(layout->lods[lv].slice_width, 16);
+ th = align(layout->lods[lv].slice_height, hz_align_j) *
+ info->array_size / 2;
/* convert to Y-tiles */
- tw = align(tw, 128) / 128;
- th = align(th, 32) / 32;
+ tw = (tw + 127) / 128;
+ th = (th + 31) / 32;
if (hz_width < cur_tx + tw)
hz_width = cur_tx + tw;
@@ -1102,22 +1184,23 @@ img_calculate_hiz_size(struct ilo_image *img,
}
/* convert tile offsets to memory offsets */
- for (lv = 0; lv <= templ->last_level; lv++) {
+ for (lv = 0; lv < info->level_count; lv++) {
img->aux.walk_lod_offsets[lv] =
(lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
}
+
hz_width *= 128;
hz_height *= 32;
}
break;
case ILO_IMAGE_WALK_3D:
- hz_width = align(img->lods[0].slice_width, 16);
+ hz_width = align(layout->lods[0].slice_width, 16);
hz_height = 0;
- for (lv = 0; lv <= templ->last_level; lv++) {
- const unsigned h = align(img->lods[lv].slice_height, hz_align_j);
+ for (lv = 0; lv < info->level_count; lv++) {
+ const int h = align(layout->lods[lv].slice_height, hz_align_j);
/* according to the formula, slices are packed together vertically */
- hz_height += h * u_minify(templ->depth0, lv);
+ hz_height += h * u_minify(info->depth, lv);
}
hz_height /= 2;
break;
@@ -1136,8 +1219,7 @@ img_calculate_hiz_size(struct ilo_image *img,
*/
hz_clear_w = 8;
hz_clear_h = 4;
- switch (templ->nr_samples) {
- case 0:
+ switch (info->sample_count) {
case 1:
default:
break;
@@ -1158,33 +1240,38 @@ img_calculate_hiz_size(struct ilo_image *img,
break;
}
- for (lv = 0; lv <= templ->last_level; lv++) {
- if (u_minify(img->width0, lv) % hz_clear_w ||
- u_minify(img->height0, lv) % hz_clear_h)
+ for (lv = 0; lv < info->level_count; lv++) {
+ if (u_minify(info->width, lv) % hz_clear_w ||
+ u_minify(info->height, lv) % hz_clear_h)
break;
img->aux.enables |= 1 << lv;
}
- /* we padded to allow this in img_align() */
- if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
+ /* we padded to allow this in image_get_gen6_monolithic_size() */
+ if (info->level_count == 1 && info->array_size == 1 && info->depth == 1)
img->aux.enables |= 0x1;
/* align to Y-tile */
img->aux.bo_stride = align(hz_width, 128);
img->aux.bo_height = align(hz_height, 32);
+
+ return true;
}
-static void
-img_calculate_mcs_size(struct ilo_image *img,
- const struct ilo_image_params *params)
+static bool
+image_set_gen7_mcs(struct ilo_image *img,
+ const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout)
{
- const struct pipe_resource *templ = params->templ;
int mcs_width, mcs_height, mcs_cpp;
int downscale_x, downscale_y;
- assert(img->aux.type == ILO_IMAGE_AUX_MCS);
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ assert(layout->aux == ILO_IMAGE_AUX_MCS);
- if (templ->nr_samples > 1) {
+ if (info->sample_count > 1) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
@@ -1198,7 +1285,7 @@ img_calculate_mcs_size(struct ilo_image *img,
* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
* pixel block in the RT.
*/
- switch (templ->nr_samples) {
+ switch (info->sample_count) {
case 2:
case 4:
downscale_x = 8;
@@ -1217,7 +1304,7 @@ img_calculate_mcs_size(struct ilo_image *img,
break;
default:
assert(!"unsupported sample count");
- return;
+ return false;
break;
}
@@ -1226,8 +1313,8 @@ img_calculate_mcs_size(struct ilo_image *img,
* clear rectangle cannot be masked. The scale-down clear rectangle
* thus must be aligned to 2x2, and we need to pad.
*/
- mcs_width = align(img->width0, downscale_x * 2);
- mcs_height = align(img->height0, downscale_y * 2);
+ mcs_width = align(info->width, downscale_x * 2);
+ mcs_height = align(info->height, downscale_y * 2);
} else {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 327:
@@ -1262,18 +1349,18 @@ img_calculate_mcs_size(struct ilo_image *img,
* anything except for the size of the allocated MCS. Let's see if we
* hit out-of-bound access.
*/
- switch (img->tiling) {
+ switch (layout->tiling) {
case GEN6_TILING_X:
- downscale_x = 64 / img->block_size;
+ downscale_x = 64 / info->block_size;
downscale_y = 2;
break;
case GEN6_TILING_Y:
- downscale_x = 32 / img->block_size;
+ downscale_x = 32 / info->block_size;
downscale_y = 4;
break;
default:
assert(!"unsupported tiling mode");
- return;
+ return false;
break;
}
@@ -1290,181 +1377,75 @@ img_calculate_mcs_size(struct ilo_image *img,
* The scaled-down clear rectangle must be aligned to 4x4 instead of
* 2x2, and we need to pad.
*/
- mcs_width = align(img->width0, downscale_x * 4) / downscale_x;
- mcs_height = align(img->height0, downscale_y * 4) / downscale_y;
+ mcs_width = align(info->width, downscale_x * 4) / downscale_x;
+ mcs_height = align(info->height, downscale_y * 4) / downscale_y;
mcs_cpp = 16; /* an OWord */
}
- img->aux.enables = (1 << (templ->last_level + 1)) - 1;
+ img->aux.enables = (1 << info->level_count) - 1;
/* align to Y-tile */
img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
img->aux.bo_height = align(mcs_height, 32);
-}
-
-static void
-img_init(struct ilo_image *img,
- struct ilo_image_params *params)
-{
- /* there are hard dependencies between every function here */
-
- img_init_aux(img, params);
- img_init_size_and_format(img, params);
- img_init_walk(img, params);
- img_init_tiling(img, params);
- img_init_alignments(img, params);
- img_init_lods(img, params);
- img_init_layer_height(img, params);
-
- img_align(img, params);
- img_calculate_bo_size(img, params);
- img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT);
-
- switch (img->aux.type) {
- case ILO_IMAGE_AUX_HIZ:
- img_calculate_hiz_size(img, params);
- break;
- case ILO_IMAGE_AUX_MCS:
- img_calculate_mcs_size(img, params);
- break;
- default:
- break;
- }
-}
-
-/**
- * The texutre is for transfer only. We can define our own layout to save
- * space.
- */
-static void
-img_init_for_transfer(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ)
-{
- const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
- templ->depth0 : templ->array_size;
- unsigned layer_width, layer_height;
-
- assert(templ->last_level == 0);
- assert(templ->nr_samples <= 1);
-
- img->aux.type = ILO_IMAGE_AUX_NONE;
-
- img->target = templ->target;
- img->width0 = templ->width0;
- img->height0 = templ->height0;
- img->depth0 = templ->depth0;
- img->array_size = templ->array_size;
- img->level_count = 1;
- img->sample_count = 1;
-
- img->format = templ->format;
- img->block_width = util_format_get_blockwidth(templ->format);
- img->block_height = util_format_get_blockheight(templ->format);
- img->block_size = util_format_get_blocksize(templ->format);
-
- img->walk = ILO_IMAGE_WALK_LOD;
-
- img->tiling = GEN6_TILING_NONE;
-
- img->align_i = img->block_width;
- img->align_j = img->block_height;
-
- assert(util_is_power_of_two(img->block_width) &&
- util_is_power_of_two(img->block_height));
-
- /* use packed layout */
- layer_width = align(templ->width0, img->align_i);
- layer_height = align(templ->height0, img->align_j);
-
- img->lods[0].slice_width = layer_width;
- img->lods[0].slice_height = layer_height;
-
- img->bo_stride = (layer_width / img->block_width) * img->block_size;
- img->bo_stride = align(img->bo_stride, 64);
-
- img->bo_height = (layer_height / img->block_height) * num_layers;
+ return true;
}
-/**
- * Initialize the image. Callers should zero-initialize \p img first.
- */
-void ilo_image_init(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ)
+bool
+ilo_image_init(struct ilo_image *img,
+ const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
{
- struct ilo_image_params params;
- bool transfer_only;
+ struct ilo_image_layout layout;
assert(ilo_is_zeroed(img, sizeof(*img)));
- /* use transfer layout when the texture is never bound to GPU */
- transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
- PIPE_BIND_TRANSFER_READ));
- if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
- img_init_for_transfer(img, dev, templ);
- return;
- }
+ memset(&layout, 0, sizeof(layout));
+ layout.lods = img->lods;
- memset(&params, 0, sizeof(params));
- params.dev = dev;
- params.templ = templ;
- params.valid_tilings = IMAGE_TILING_ALL;
+ if (!image_get_gen6_layout(dev, info, &layout))
+ return false;
- img_init(img, &params);
-}
+ img->type = info->type;
-bool
-ilo_image_init_for_imported(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ,
- enum gen_surface_tiling tiling,
- unsigned bo_stride)
-{
- struct ilo_image_params params;
+ img->format = info->format;
+ img->block_width = info->block_width;
+ img->block_height = info->block_height;
+ img->block_size = info->block_size;
- assert(ilo_is_zeroed(img, sizeof(*img)));
+ img->width0 = info->width;
+ img->height0 = info->height;
+ img->depth0 = info->depth;
+ img->array_size = info->array_size;
+ img->level_count = info->level_count;
+ img->sample_count = info->sample_count;
- if ((tiling == GEN6_TILING_X && bo_stride % 512) ||
- (tiling == GEN6_TILING_Y && bo_stride % 128) ||
- (tiling == GEN8_TILING_W && bo_stride % 64))
- return false;
+ img->walk = layout.walk;
+ img->interleaved_samples = layout.interleaved_samples;
- memset(&params, 0, sizeof(params));
- params.dev = dev;
- params.templ = templ;
- params.valid_tilings = 1 << tiling;
+ img->tiling = layout.tiling;
- img_init(img, &params);
+ img->aux.type = layout.aux;
- assert(img->tiling == tiling);
- if (img->bo_stride > bo_stride)
- return false;
-
- img->bo_stride = bo_stride;
-
- /* assume imported RTs are also scanouts */
- if (!img->scanout)
- img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET);
+ img->align_i = layout.align_i;
+ img->align_j = layout.align_j;
- return true;
-}
+ img->walk_layer_height = layout.walk_layer_height;
-bool
-ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev)
-{
- /* HiZ is required for separate stencil on Gen6 */
- if (ilo_dev_gen(dev) == ILO_GEN(6) &&
- img->aux.type == ILO_IMAGE_AUX_HIZ &&
- img->separate_stencil)
+ if (!image_set_gen6_bo_size(img, dev, info, &layout))
return false;
- /* MCS is required for multisample images */
- if (img->aux.type == ILO_IMAGE_AUX_MCS &&
- img->sample_count > 1)
- return false;
+ img->scanout = info->bind_scanout;
- img->aux.enables = 0x0;
+ switch (layout.aux) {
+ case ILO_IMAGE_AUX_HIZ:
+ image_set_gen6_hiz(img, dev, info, &layout);
+ break;
+ case ILO_IMAGE_AUX_MCS:
+ image_set_gen7_mcs(img, dev, info, &layout);
+ break;
+ default:
+ break;
+ }
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index af15e856028..646ed6f5727 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -29,11 +29,17 @@
#define ILO_IMAGE_H
#include "genhw/genhw.h"
-#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
+/*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 75:
+ *
+ * "(MIP Count / LOD) representing [1,15] MIP levels"
+ */
+#define ILO_IMAGE_MAX_LEVEL_COUNT 15
+
enum ilo_image_aux_type {
ILO_IMAGE_AUX_NONE,
ILO_IMAGE_AUX_HIZ,
@@ -68,6 +74,49 @@ enum ilo_image_walk_type {
ILO_IMAGE_WALK_3D,
};
+struct ilo_image_info {
+ enum gen_surface_type type;
+
+ enum gen_surface_format format;
+ bool interleaved_stencil;
+ bool is_integer;
+ /* width, height and size of pixel blocks */
+ bool compressed;
+ unsigned block_width;
+ unsigned block_height;
+ unsigned block_size;
+
+ /* image size */
+ uint16_t width;
+ uint16_t height;
+ uint16_t depth;
+ uint16_t array_size;
+ uint8_t level_count;
+ uint8_t sample_count;
+
+ /* disable optional aux */
+ bool aux_disable;
+
+ /* tilings to consider, if any bit is set */
+ uint8_t valid_tilings;
+
+ /*
+ * prefer GEN6_TILING_NONE when the (estimated) image size exceeds the
+ * threshold
+ */
+ uint32_t prefer_linear_threshold;
+
+ /* force a stride when non-zero */
+ uint32_t force_bo_stride;
+
+ bool bind_surface_sampler;
+ bool bind_surface_dp_render;
+ bool bind_surface_dp_typed;
+ bool bind_zs;
+ bool bind_scanout;
+ bool bind_cursor;
+};
+
/*
* When the walk type is ILO_IMAGE_WALK_LAYER, there is only a slice in each
* LOD and this is used to describe LODs in the first array layer. Otherwise,
@@ -88,7 +137,10 @@ struct ilo_image_lod {
* Texture layout.
*/
struct ilo_image {
- enum pipe_texture_target target;
+ enum gen_surface_type type;
+
+ enum gen_surface_format format;
+ bool interleaved_stencil;
/* size, format, etc for programming hardware states */
unsigned width0;
@@ -97,8 +149,6 @@ struct ilo_image {
unsigned array_size;
unsigned level_count;
unsigned sample_count;
- enum pipe_format format;
- bool separate_stencil;
/*
* width, height, and size of pixel blocks for conversion between pixel
@@ -117,7 +167,7 @@ struct ilo_image {
unsigned align_i;
unsigned align_j;
- struct ilo_image_lod lods[PIPE_MAX_TEXTURE_LEVELS];
+ struct ilo_image_lod lods[ILO_IMAGE_MAX_LEVEL_COUNT];
/* physical layer height for ILO_IMAGE_WALK_LAYER */
unsigned walk_layer_height;
@@ -136,36 +186,18 @@ struct ilo_image {
unsigned enables;
/* LOD offsets for ILO_IMAGE_WALK_LOD */
- unsigned walk_lod_offsets[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned walk_lod_offsets[ILO_IMAGE_MAX_LEVEL_COUNT];
unsigned walk_layer_height;
unsigned bo_stride;
unsigned bo_height;
-
- /* managed by users */
- struct intel_bo *bo;
} aux;
-
- /* managed by users */
- struct intel_bo *bo;
};
-struct pipe_resource;
-
-void
+bool
ilo_image_init(struct ilo_image *img,
const struct ilo_dev *dev,
- const struct pipe_resource *templ);
-
-bool
-ilo_image_init_for_imported(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ,
- enum gen_surface_tiling tiling,
- unsigned bo_stride);
-
-bool
-ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev);
+ const struct ilo_image_info *info);
static inline bool
ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level)
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.c b/src/gallium/drivers/ilo/core/ilo_state_sol.c
index 38c0b719ab3..6ef2c91a592 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_sol.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.c
@@ -26,7 +26,7 @@
*/
#include "ilo_debug.h"
-#include "ilo_buffer.h"
+#include "ilo_vma.h"
#include "ilo_state_sol.h"
static bool
@@ -270,9 +270,6 @@ sol_buffer_validate_gen7(const struct ilo_dev *dev,
{
ILO_DEV_ASSERT(dev, 7, 8);
- if (info->buf)
- assert(info->offset < info->buf->bo_size && info->size);
-
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 208:
*
@@ -281,9 +278,17 @@ sol_buffer_validate_gen7(const struct ilo_dev *dev,
*/
assert(info->offset % 4 == 0);
+ if (info->vma) {
+ assert(info->vma->vm_alignment % 4 == 0);
+ assert(info->size && info->offset + info->size <= info->vma->vm_size);
+ }
+
/* Gen8+ only */
- if (info->write_offset_load || info->write_offset_save)
- assert(ilo_dev_gen(dev) >= ILO_GEN(8));
+ if (info->write_offset_load || info->write_offset_save) {
+ assert(ilo_dev_gen(dev) >= ILO_GEN(8) && info->write_offset_vma);
+ assert(info->write_offset_offset + sizeof(uint32_t) <=
+ info->write_offset_vma->vm_size);
+ }
/*
* From the Broadwell PRM, volume 2b, page 206:
@@ -304,25 +309,15 @@ static uint32_t
sol_buffer_get_gen6_size(const struct ilo_dev *dev,
const struct ilo_state_sol_buffer_info *info)
{
- uint32_t size;
-
ILO_DEV_ASSERT(dev, 6, 8);
- if (!info->buf)
- return 0;
-
- size = (info->offset + info->size <= info->buf->bo_size) ? info->size :
- info->buf->bo_size - info->offset;
-
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 208:
*
* "(Surface End Address) This field specifies the ending DWord
* address..."
*/
- size &= ~3;
-
- return size;
+ return (info->vma) ? info->size & ~3 : 0;
}
static bool
@@ -359,7 +354,7 @@ sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
dw1 = 0;
- if (info->buf)
+ if (info->vma)
dw1 |= GEN8_SO_BUF_DW1_ENABLE;
if (info->write_offset_load)
dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE;
@@ -429,6 +424,15 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
return ilo_state_sol_init(sol, dev, &info);
}
+uint32_t
+ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment)
+{
+ /* DWord aligned without padding */
+ *alignment = 4;
+ return size;
+}
+
bool
ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
const struct ilo_dev *dev,
@@ -443,9 +447,8 @@ ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
else
ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info);
- sb->need_bo = (info->size > 0);
- sb->need_write_offset_bo = (info->write_offset_save ||
- (info->write_offset_load && !info->write_offset_imm_enable));
+ sb->vma = info->vma;
+ sb->write_offset_vma = info->write_offset_vma;
assert(ret);
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.h b/src/gallium/drivers/ilo/core/ilo_state_sol.h
index 2513fcb4979..92c5f94725b 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_sol.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.h
@@ -107,17 +107,17 @@ struct ilo_state_sol {
uint8_t decl_count;
};
-struct ilo_buffer;
+struct ilo_vma;
struct ilo_state_sol_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
uint32_t offset;
uint32_t size;
- /*
- * Gen8+ only. When enabled, require a write offset bo of at least
- * (sizeof(uint32_t) * ILO_STATE_SOL_MAX_BUFFER_COUNT) bytes
- */
+ /* Gen8+ only; at least sizeof(uint32_t) bytes */
+ const struct ilo_vma *write_offset_vma;
+ uint32_t write_offset_offset;
+
bool write_offset_load;
bool write_offset_save;
@@ -126,14 +126,10 @@ struct ilo_state_sol_buffer_info {
};
struct ilo_state_sol_buffer {
- uint32_t so_buf[4];
-
- bool need_bo;
- bool need_write_offset_bo;
+ uint32_t so_buf[5];
- /* managed by users */
- struct intel_bo *bo;
- struct intel_bo *write_offset_bo;
+ const struct ilo_vma *vma;
+ const struct ilo_vma *write_offset_vma;
};
static inline size_t
@@ -154,6 +150,10 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
const struct ilo_dev *dev,
bool render_disable);
+uint32_t
+ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment);
+
bool
ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
const struct ilo_dev *dev,
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c
index 5be9f8f6270..40fe15f316f 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c
@@ -26,8 +26,8 @@
*/
#include "ilo_debug.h"
-#include "ilo_buffer.h"
#include "ilo_image.h"
+#include "ilo_vma.h"
#include "ilo_state_surface.h"
static bool
@@ -94,31 +94,13 @@ surface_set_gen7_null_SURFACE_STATE(struct ilo_state_surface *surf,
return true;
}
-static bool
-surface_validate_gen6_buffer(const struct ilo_dev *dev,
- const struct ilo_state_surface_buffer_info *info)
+static uint32_t
+surface_get_gen6_buffer_offset_alignment(const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info)
{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- /* SVB writes are Gen6-only */
- if (ilo_dev_gen(dev) >= ILO_GEN(7))
- assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB);
-
- if (info->offset + info->size > info->buf->bo_size) {
- ilo_warn("invalid buffer range\n");
- return false;
- }
+ uint32_t alignment;
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "For surfaces of type SURFTYPE_BUFFER: [0,2047] -> [1B, 2048B]
- * For surfaces of type SURFTYPE_STRBUF: [0,2047] -> [1B, 2048B]"
- */
- if (!info->struct_size || info->struct_size > 2048) {
- ilo_warn("invalid buffer struct size\n");
- return false;
- }
+ ILO_DEV_ASSERT(dev, 6, 8);
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 68:
@@ -132,76 +114,153 @@ surface_validate_gen6_buffer(const struct ilo_dev *dev,
* "Certain message types used to access surfaces have more stringent
* alignment requirements. Please refer to the specific message
* documentation for additional restrictions."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237:
- *
- * "the surface base address must be OWord aligned"
- *
- * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord Dual
- * Block Read/Write.
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249:
- *
- * "The surface base address must be DWord aligned"
- *
- * for DWord Scattered Read/Write and Byte Scattered Read/Write.
- *
- * We have to rely on users to correctly set info->struct_size here. DWord
- * Scattered Read/Write has conflicting pitch and alignment, but we do not
- * use them yet so we are fine.
- *
- * It is unclear if sampling engine surfaces require aligned offsets.
*/
- if (info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB) {
- assert(info->struct_size % info->format_size == 0);
+ switch (info->access) {
+ case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+ /* no alignment requirements */
+ alignment = 1;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+ case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+ /* element-size aligned */
+ alignment = info->format_size;
- if (info->offset % info->struct_size) {
- ilo_warn("bad buffer offset\n");
- return false;
- }
- }
+ assert(info->struct_size % alignment == 0);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+ /*
+ * Nothing is said about Untyped* messages, but I think they require the
+ * base address to be DWord aligned.
+ */
+ alignment = 4;
- if (info->format == GEN6_FORMAT_RAW) {
/*
- * From the Sandy Bridge PRM, volume 4 part 1, page 97:
+ * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+ *
+ * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the
+ * pitch must be a multiple of 4 bytes."
+ */
+ if (info->struct_size > 1)
+ assert(info->struct_size % alignment == 0);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237:
+ *
+ * "the surface base address must be OWord aligned"
+ *
+ * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord
+ * Dual Block Read/Write.
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249:
*
- * ""RAW" is supported only with buffers and structured buffers
- * accessed via the untyped surface read/write and untyped atomic
- * operation messages, which do not have a column in the table."
+ * "The surface base address must be DWord aligned"
*
- * We do not have a specific access mode for untyped messages.
+ * for DWord Scattered Read/Write and Byte Scattered Read/Write.
*/
- assert(info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED);
+ alignment = (info->format_size > 4) ? 16 : 4;
/*
- * Nothing is said about Untyped* messages, but I guess they require the
- * base address to be DWord aligned.
+ * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, 237, and
+ * 246:
+ *
+ * "the surface pitch is ignored, the surface is treated as a
+ * 1-dimensional surface. An element size (pitch) of 16 bytes is
+ * used to determine the size of the buffer for out-of-bounds
+ * checking if using the surface state model."
+ *
+ * for OWord Block Read/Write, Unaligned OWord Block Read, OWord
+ * Dual Block Read/Write, and DWord Scattered Read/Write.
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 248:
+ *
+ * "The surface pitch is ignored, the surface is treated as a
+ * 1-dimensional surface. An element size (pitch) of 4 bytes is
+ * used to determine the size of the buffer for out-of-bounds
+ * checking if using the surface state model."
+ *
+ * for Byte Scattered Read/Write.
+ *
+ * It is programmable on Gen7.5+.
*/
- if (info->offset % 4) {
- ilo_warn("bad RAW buffer offset\n");
- return false;
+ if (ilo_dev_gen(dev) < ILO_GEN(7.5)) {
+ const int fixed = (info->format_size > 1) ? 16 : 4;
+ assert(info->struct_size == fixed);
}
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 259:
+ *
+ * "Both the surface base address and surface pitch must be DWord
+ * aligned."
+ */
+ alignment = 4;
- if (info->struct_size > 1) {
- /* no STRBUF on Gen6 */
- if (ilo_dev_gen(dev) == ILO_GEN(6)) {
- ilo_warn("no STRBUF support\n");
- return false;
- }
+ assert(info->struct_size % alignment == 0);
+ break;
+ default:
+ assert(!"unknown access");
+ alignment = 1;
+ break;
+ }
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 70:
- *
- * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the
- * pitch must be a multiple of 4 bytes."
- */
- if (info->struct_size % 4) {
- ilo_warn("bad STRBUF pitch\n");
- return false;
- }
- }
+ return alignment;
+}
+
+static bool
+surface_validate_gen6_buffer(const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info)
+{
+ uint32_t alignment;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (info->offset + info->size > info->vma->vm_size) {
+ ilo_warn("invalid buffer range\n");
+ return false;
}
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+ *
+ * "For surfaces of type SURFTYPE_BUFFER: [0,2047] -> [1B, 2048B]
+ * For surfaces of type SURFTYPE_STRBUF: [0,2047] -> [1B, 2048B]"
+ */
+ if (!info->struct_size || info->struct_size > 2048) {
+ ilo_warn("invalid buffer struct size\n");
+ return false;
+ }
+
+ alignment = surface_get_gen6_buffer_offset_alignment(dev, info);
+ if (info->offset % alignment || info->vma->vm_alignment % alignment) {
+ ilo_warn("bad buffer offset\n");
+ return false;
+ }
+
+ /* no STRBUF on Gen6 */
+ if (info->format == GEN6_FORMAT_RAW && info->struct_size > 1)
+ assert(ilo_dev_gen(dev) >= ILO_GEN(7));
+
+ /* SVB writes are Gen6 only */
+ if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB)
+ assert(ilo_dev_gen(dev) == ILO_GEN(6));
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 83:
+ *
+ * "NOTE: "RAW" is supported only with buffers and structured buffers
+ * accessed via the untyped surface read/write and untyped atomic
+ * operation messages, which do not have a column in the table."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 252:
+ *
+ * "For untyped messages, the Surface Format must be RAW and the
+ * Surface Type must be SURFTYPE_BUFFER or SURFTYPE_STRBUF."
+ */
+ assert((info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED) ==
+ (info->format == GEN6_FORMAT_RAW));
+
return true;
}
@@ -215,8 +274,7 @@ surface_get_gen6_buffer_struct_count(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
c = info->size / info->struct_size;
- if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB &&
- info->format_size < info->size - info->struct_size * c)
+ if (info->format_size < info->size - info->struct_size * c)
c++;
/*
@@ -367,29 +425,6 @@ surface_set_gen7_buffer_SURFACE_STATE(struct ilo_state_surface *surf,
return true;
}
-static enum gen_surface_type
-get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- switch (img->target) {
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return GEN6_SURFTYPE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return GEN6_SURFTYPE_2D;
- case PIPE_TEXTURE_3D:
- return GEN6_SURFTYPE_3D;
- default:
- assert(!"unknown texture target");
- return GEN6_SURFTYPE_NULL;
- }
-}
-
static bool
surface_validate_gen6_image(const struct ilo_dev *dev,
const struct ilo_state_surface_image_info *info)
@@ -408,6 +443,17 @@ surface_validate_gen6_image(const struct ilo_dev *dev,
break;
}
+ assert(info->img && info->vma);
+
+ if (info->img->tiling != GEN6_TILING_NONE)
+ assert(info->vma->vm_alignment % 4096 == 0);
+
+ if (info->aux_vma) {
+ assert(ilo_image_can_enable_aux(info->img, info->level_base));
+ /* always tiled */
+ assert(info->aux_vma->vm_alignment % 4096 == 0);
+ }
+
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 78:
*
@@ -418,16 +464,18 @@ surface_validate_gen6_image(const struct ilo_dev *dev,
assert(info->img->bo_stride && info->img->bo_stride <= 512 * 1024 &&
info->img->width0 <= info->img->bo_stride);
- if (info->is_cube_map) {
- assert(get_gen6_surface_type(dev, info->img) == GEN6_SURFTYPE_2D);
+ if (info->type != info->img->type) {
+ assert(info->type == GEN6_SURFTYPE_2D &&
+ info->img->type == GEN6_SURFTYPE_CUBE);
+ }
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 78:
- *
- * "For cube maps, Width must be set equal to the Height."
- */
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 78:
+ *
+ * "For cube maps, Width must be set equal to the Height."
+ */
+ if (info->type == GEN6_SURFTYPE_CUBE)
assert(info->img->width0 == info->img->height0);
- }
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 72:
@@ -463,20 +511,21 @@ surface_validate_gen6_image(const struct ilo_dev *dev,
}
static void
-get_gen6_max_extent(const struct ilo_dev *dev,
- const struct ilo_image *img,
- uint16_t *max_w, uint16_t *max_h)
+surface_get_gen6_image_max_extent(const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info,
+ uint16_t *max_w, uint16_t *max_h)
{
const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
ILO_DEV_ASSERT(dev, 6, 8);
- switch (get_gen6_surface_type(dev, img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
*max_w = max_size;
*max_h = 1;
break;
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
*max_w = max_size;
*max_h = max_size;
break;
@@ -504,7 +553,7 @@ surface_get_gen6_image_extent(const struct ilo_dev *dev,
w = info->img->width0;
h = info->img->height0;
- get_gen6_max_extent(dev, info->img, &max_w, &max_h);
+ surface_get_gen6_image_max_extent(dev, info, &max_w, &max_h);
assert(w && h && w <= max_w && h <= max_h);
*width = w - 1;
@@ -555,16 +604,17 @@ surface_get_gen6_image_slices(const struct ilo_dev *dev,
* layers to (86 * 6), about 512.
*/
- switch (get_gen6_surface_type(dev, info->img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 2048 : 512;
assert(info->img->array_size <= max_slice);
max_slice = info->img->array_size;
d = info->slice_count;
- if (info->is_cube_map) {
+ if (info->type == GEN6_SURFTYPE_CUBE) {
if (info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
if (!d || d % 6) {
ilo_warn("invalid cube slice count\n");
@@ -877,7 +927,6 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
uint8_t min_lod, mip_count;
enum gen_sample_count sample_count;
uint32_t alignments;
- enum gen_surface_type type;
uint32_t dw0, dw2, dw3, dw4, dw5;
ILO_DEV_ASSERT(dev, 6, 6);
@@ -897,10 +946,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
if (info->img->sample_count > 1)
assert(info->img->interleaved_samples);
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- get_gen6_surface_type(dev, info->img);
-
- dw0 = type << GEN6_SURFACE_DW0_TYPE__SHIFT |
+ dw0 = info->type << GEN6_SURFACE_DW0_TYPE__SHIFT |
info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
@@ -927,7 +973,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
* "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this
* field must be programmed to 111111b (all faces enabled)."
*/
- if (info->is_cube_map &&
+ if (info->type == GEN6_SURFTYPE_CUBE &&
info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
dw0 |= GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE |
GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
@@ -956,7 +1002,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
surf->surface[4] = dw4;
surf->surface[5] = dw5;
- surf->type = type;
+ surf->type = info->type;
surf->min_lod = min_lod;
surf->mip_count = mip_count;
@@ -972,7 +1018,6 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
uint8_t min_lod, mip_count;
uint32_t alignments;
enum gen_sample_count sample_count;
- enum gen_surface_type type;
uint32_t dw0, dw1, dw2, dw3, dw4, dw5, dw7;
ILO_DEV_ASSERT(dev, 7, 8);
@@ -986,10 +1031,7 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
!surface_get_gen6_image_alignments(dev, info, &alignments))
return false;
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- get_gen6_surface_type(dev, info->img);
-
- dw0 = type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+ dw0 = info->type << GEN7_SURFACE_DW0_TYPE__SHIFT |
info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT |
alignments;
@@ -1023,7 +1065,7 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
* field must be programmed to 111111b (all faces enabled). This field
* is ignored unless the Surface Type is SURFTYPE_CUBE."
*/
- if (info->is_cube_map &&
+ if (info->type == GEN6_SURFTYPE_CUBE &&
info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER)
dw0 |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
@@ -1087,13 +1129,61 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
surf->surface[12] = 0;
}
- surf->type = type;
+ surf->type = info->type;
surf->min_lod = min_lod;
surf->mip_count = mip_count;
return true;
}
+uint32_t
+ilo_state_surface_buffer_size(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ uint32_t size, uint32_t *alignment)
+{
+ switch (access) {
+ case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 118:
+ *
+ * "For buffers, which have no inherent "height," padding
+ * requirements are different. A buffer must be padded to the next
+ * multiple of 256 array elements, with an additional 16 bytes
+ * added beyond that to account for the L1 cache line."
+ *
+ * Assuming tightly packed GEN6_FORMAT_R32G32B32A32_FLOAT, the size
+ * needs to be padded to 4096 (= 16 * 256).
+ */
+ *alignment = 1;
+ size = align(size, 4096) + 16;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+ case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+ /* element-size aligned for worst cases */
+ *alignment = 16;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+ /* DWord aligned? */
+ *alignment = 4;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+ /* OWord aligned */
+ *alignment = 16;
+ size = align(size, 16);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+ /* always DWord aligned */
+ *alignment = 4;
+ break;
+ default:
+ assert(!"unknown access");
+ *alignment = 1;
+ break;
+ }
+
+ return size;
+}
+
bool
ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
const struct ilo_dev *dev)
@@ -1107,6 +1197,7 @@ ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
else
ret &= surface_set_gen6_null_SURFACE_STATE(surf, dev);
+ surf->vma = NULL;
surf->type = GEN6_SURFTYPE_NULL;
surf->readonly = true;
@@ -1129,6 +1220,7 @@ ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf,
else
ret &= surface_set_gen6_buffer_SURFACE_STATE(surf, dev, info);
+ surf->vma = info->vma;
surf->readonly = info->readonly;
assert(ret);
@@ -1150,6 +1242,9 @@ ilo_state_surface_init_for_image(struct ilo_state_surface *surf,
else
ret &= surface_set_gen6_image_SURFACE_STATE(surf, dev, info);
+ surf->vma = info->vma;
+ surf->aux_vma = info->aux_vma;
+
surf->is_integer = info->is_integer;
surf->readonly = info->readonly;
surf->scanout = info->img->scanout;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h
index 9c025428d50..e78c7c97db1 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h
@@ -29,14 +29,10 @@
#define ILO_STATE_SURFACE_H
#include "genhw/genhw.h"
-#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
-struct ilo_buffer;
-struct ilo_image;
-
enum ilo_state_surface_access {
ILO_STATE_SURFACE_ACCESS_SAMPLER, /* sampling engine surfaces */
ILO_STATE_SURFACE_ACCESS_DP_RENDER, /* render target surfaces */
@@ -46,42 +42,51 @@ enum ilo_state_surface_access {
ILO_STATE_SURFACE_ACCESS_DP_SVB,
};
+struct ilo_vma;
+struct ilo_image;
+
struct ilo_state_surface_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
+ uint32_t offset;
+ uint32_t size;
enum ilo_state_surface_access access;
+ /* format_size may be less than, equal to, or greater than struct_size */
enum gen_surface_format format;
uint8_t format_size;
bool readonly;
uint16_t struct_size;
-
- uint32_t offset;
- uint32_t size;
};
struct ilo_state_surface_image_info {
const struct ilo_image *img;
+ uint8_t level_base;
+ uint8_t level_count;
+ uint16_t slice_base;
+ uint16_t slice_count;
+
+ const struct ilo_vma *vma;
+ const struct ilo_vma *aux_vma;
enum ilo_state_surface_access access;
+ enum gen_surface_type type;
+
enum gen_surface_format format;
bool is_integer;
bool readonly;
- bool is_cube_map;
bool is_array;
-
- uint8_t level_base;
- uint8_t level_count;
- uint16_t slice_base;
- uint16_t slice_count;
};
struct ilo_state_surface {
uint32_t surface[13];
+ const struct ilo_vma *vma;
+ const struct ilo_vma *aux_vma;
+
enum gen_surface_type type;
uint8_t min_lod;
uint8_t mip_count;
@@ -89,9 +94,6 @@ struct ilo_state_surface {
bool readonly;
bool scanout;
-
- /* managed by users */
- struct intel_bo *bo;
};
bool
@@ -99,6 +101,11 @@ ilo_state_surface_valid_format(const struct ilo_dev *dev,
enum ilo_state_surface_access access,
enum gen_surface_format format);
+uint32_t
+ilo_state_surface_buffer_size(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ uint32_t size, uint32_t *alignment);
+
bool
ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
const struct ilo_dev *dev);
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c
index ddc75428ed7..9faf835fef2 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_vf.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c
@@ -26,7 +26,7 @@
*/
#include "ilo_debug.h"
-#include "ilo_buffer.h"
+#include "ilo_vma.h"
#include "ilo_state_vf.h"
static bool
@@ -479,8 +479,8 @@ vertex_buffer_validate_gen6(const struct ilo_dev *dev,
{
ILO_DEV_ASSERT(dev, 6, 8);
- if (info->buf)
- assert(info->offset < info->buf->bo_size && info->size);
+ if (info->vma)
+ assert(info->size && info->offset + info->size <= info->vma->vm_size);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 86:
@@ -500,6 +500,9 @@ vertex_buffer_validate_gen6(const struct ilo_dev *dev,
* aligned address, and BufferPitch must be a multiple of 64-bits."
*/
if (info->cv_has_double) {
+ if (info->vma)
+ assert(info->vma->vm_alignment % 8 == 0);
+
assert(info->stride % 8 == 0);
assert((info->offset + info->cv_double_vertex_offset_mod_8) % 8 == 0);
}
@@ -512,12 +515,7 @@ vertex_buffer_get_gen6_size(const struct ilo_dev *dev,
const struct ilo_state_vertex_buffer_info *info)
{
ILO_DEV_ASSERT(dev, 6, 8);
-
- if (!info->buf)
- return 0;
-
- return (info->offset + info->size <= info->buf->bo_size) ? info->size :
- info->buf->bo_size - info->offset;
+ return (info->vma) ? info->size : 0;
}
static bool
@@ -537,7 +535,7 @@ vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb,
if (ilo_dev_gen(dev) >= ILO_GEN(7))
dw0 |= GEN7_VB_DW0_ADDR_MODIFIED;
- if (!info->buf)
+ if (!info->vma)
dw0 |= GEN6_VB_DW0_IS_NULL;
STATIC_ASSERT(ARRAY_SIZE(vb->vb) >= 3);
@@ -551,7 +549,7 @@ vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb,
vb->vb[2] = (size) ? info->offset + size - 1 : 0;
}
- vb->need_bo = (info->buf != NULL);
+ vb->vma = info->vma;
return true;
}
@@ -586,8 +584,10 @@ index_buffer_validate_gen6(const struct ilo_dev *dev,
*/
assert(info->offset % format_size == 0);
- if (info->buf)
- assert(info->offset < info->buf->bo_size && info->size);
+ if (info->vma) {
+ assert(info->vma->vm_alignment % format_size == 0);
+ assert(info->size && info->offset + info->size <= info->vma->vm_size);
+ }
return true;
}
@@ -600,12 +600,10 @@ index_buffer_get_gen6_size(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
- if (!info->buf)
+ if (!info->vma)
return 0;
- size = (info->offset + info->size <= info->buf->bo_size) ? info->size :
- info->buf->bo_size - info->offset;
-
+ size = info->size;
if (ilo_dev_gen(dev) < ILO_GEN(8)) {
const uint32_t format_size = get_index_format_size(info->format);
size -= (size % format_size);
@@ -638,7 +636,7 @@ index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(struct ilo_state_index_buffer *ib,
ib->ib[2] = (size) ? info->offset + size - 1 : 0;
}
- ib->need_bo = (info->buf != NULL);
+ ib->vma = info->vma;
return true;
}
@@ -949,6 +947,15 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
}
}
+uint32_t
+ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment)
+{
+ /* align for doubles without padding */
+ *alignment = 8;
+ return size;
+}
+
/**
* No need to initialize first.
*/
@@ -966,6 +973,15 @@ ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
return ret;
}
+uint32_t
+ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment)
+{
+ /* align for the worst case without padding */
+ *alignment = get_index_format_size(GEN6_INDEX_DWORD);
+ return size;
+}
+
/**
* No need to initialize first.
*/
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h
index f15c63a248a..16b128bf63c 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_vf.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h
@@ -126,10 +126,10 @@ struct ilo_state_vf_delta {
uint32_t dirty;
};
-struct ilo_buffer;
+struct ilo_vma;
struct ilo_state_vertex_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
uint32_t offset;
uint32_t size;
@@ -143,14 +143,11 @@ struct ilo_state_vertex_buffer_info {
struct ilo_state_vertex_buffer {
uint32_t vb[3];
- bool need_bo;
-
- /* managed by users */
- struct intel_bo *bo;
+ const struct ilo_vma *vma;
};
struct ilo_state_index_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
uint32_t offset;
uint32_t size;
@@ -160,10 +157,7 @@ struct ilo_state_index_buffer_info {
struct ilo_state_index_buffer {
uint32_t ib[3];
- bool need_bo;
-
- /* managed by users */
- struct intel_bo *bo;
+ const struct ilo_vma *vma;
};
static inline size_t
@@ -215,11 +209,19 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
const struct ilo_state_vf *old,
struct ilo_state_vf_delta *delta);
+uint32_t
+ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment);
+
bool
ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
const struct ilo_dev *dev,
const struct ilo_state_vertex_buffer_info *info);
+uint32_t
+ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment);
+
bool
ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib,
const struct ilo_dev *dev,
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.c b/src/gallium/drivers/ilo/core/ilo_state_zs.c
index 901fedb5599..827632764b2 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.c
@@ -25,10 +25,9 @@
* Chia-I Wu <[email protected]>
*/
-#include "intel_winsys.h"
-
#include "ilo_debug.h"
#include "ilo_image.h"
+#include "ilo_vma.h"
#include "ilo_state_zs.h"
static bool
@@ -56,70 +55,9 @@ zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
zs->depth[3] = 0;
zs->depth[4] = 0;
- zs->depth_format = format;
-
return true;
}
-static enum gen_surface_type
-get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- switch (img->target) {
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return GEN6_SURFTYPE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return GEN6_SURFTYPE_2D;
- case PIPE_TEXTURE_3D:
- return GEN6_SURFTYPE_3D;
- default:
- assert(!"unknown texture target");
- return GEN6_SURFTYPE_NULL;
- }
-}
-
-static enum gen_depth_format
-get_gen6_depth_format(const struct ilo_dev *dev, const struct ilo_image *img)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- switch (img->format) {
- case PIPE_FORMAT_Z32_FLOAT:
- return GEN6_ZFORMAT_D32_FLOAT;
- case PIPE_FORMAT_Z24X8_UNORM:
- return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
- case PIPE_FORMAT_Z16_UNORM:
- return GEN6_ZFORMAT_D16_UNORM;
- default:
- assert(!"unknown depth format");
- return GEN6_ZFORMAT_D32_FLOAT;
- }
- } else {
- switch (img->format) {
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
- case PIPE_FORMAT_Z32_FLOAT:
- return GEN6_ZFORMAT_D32_FLOAT;
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- return GEN6_ZFORMAT_D24_UNORM_S8_UINT;
- case PIPE_FORMAT_Z24X8_UNORM:
- return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
- case PIPE_FORMAT_Z16_UNORM:
- return GEN6_ZFORMAT_D16_UNORM;
- default:
- assert(!"unknown depth format");
- return GEN6_ZFORMAT_D32_FLOAT;
- }
- }
-}
-
static bool
zs_validate_gen6(const struct ilo_dev *dev,
const struct ilo_state_zs_info *info)
@@ -128,63 +66,102 @@ zs_validate_gen6(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
+ assert(!info->z_img == !info->z_vma);
+ assert(!info->s_img == !info->s_vma);
+
+ /* all tiled */
+ if (info->z_img) {
+ assert(info->z_img->tiling == GEN6_TILING_Y);
+ assert(info->z_vma->vm_alignment % 4096 == 0);
+ }
+ if (info->s_img) {
+ assert(info->s_img->tiling == GEN8_TILING_W);
+ assert(info->s_vma->vm_alignment % 4096 == 0);
+ }
+ if (info->hiz_vma) {
+ assert(info->z_img &&
+ ilo_image_can_enable_aux(info->z_img, info->level));
+ assert(info->z_vma->vm_alignment % 4096 == 0);
+ }
+
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
- * The stencil buffer has a format of S8_UINT, and shares Surface
+ * "The stencil buffer has a format of S8_UINT, and shares Surface
* Type, Height, Width, and Depth, Minimum Array Element, Render
* Target View Extent, Depth Coordinate Offset X/Y, LOD, and Depth
- * Buffer Object Control State fields of the depth buffer.
+ * Buffer Object Control State fields of the depth buffer."
*/
- if (info->z_img == info->s_img) {
- assert(info->z_img->target == info->s_img->target &&
- info->z_img->width0 == info->s_img->width0 &&
+ if (info->z_img && info->s_img && info->z_img != info->s_img) {
+ assert(info->z_img->type == info->s_img->type &&
info->z_img->height0 == info->s_img->height0 &&
info->z_img->depth0 == info->s_img->depth0);
}
- assert(info->level < img->level_count);
- assert(img->bo_stride);
-
- if (info->hiz_enable) {
- assert(info->z_img &&
- ilo_image_can_enable_aux(info->z_img, info->level));
+ if (info->type != img->type) {
+ assert(info->type == GEN6_SURFTYPE_2D &&
+ img->type == GEN6_SURFTYPE_CUBE);
}
- if (info->is_cube_map) {
- assert(get_gen6_surface_type(dev, img) == GEN6_SURFTYPE_2D);
-
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ switch (info->format) {
+ case GEN6_ZFORMAT_D32_FLOAT:
+ case GEN6_ZFORMAT_D24_UNORM_X8_UINT:
+ case GEN6_ZFORMAT_D16_UNORM:
+ break;
+ default:
+ assert(!"unknown depth format");
+ break;
+ }
+ } else {
/*
- * From the Sandy Bridge PRM, volume 2 part 1, page 323:
+ * From the Ironlake PRM, volume 2 part 1, page 330:
+ *
+ * "If this field (Separate Stencil Buffer Enable) is disabled, the
+ * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 321:
*
- * "For cube maps, Width must be set equal to Height."
+ * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be
+ * set to the same value (enabled or disabled) as Hierarchical
+ * Depth Buffer Enable."
*/
- assert(img->width0 == img->height0);
+ if (info->hiz_vma)
+ assert(info->format != GEN6_ZFORMAT_D24_UNORM_S8_UINT);
+ else
+ assert(info->format != GEN6_ZFORMAT_D24_UNORM_X8_UINT);
}
- if (info->z_img)
- assert(info->z_img->tiling == GEN6_TILING_Y);
- if (info->s_img)
- assert(info->s_img->tiling == GEN8_TILING_W);
+ assert(info->level < img->level_count);
+ assert(img->bo_stride);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 323:
+ *
+ * "For cube maps, Width must be set equal to Height."
+ */
+ if (info->type == GEN6_SURFTYPE_CUBE)
+ assert(img->width0 == img->height0);
return true;
}
static void
-get_gen6_max_extent(const struct ilo_dev *dev,
- const struct ilo_image *img,
- uint16_t *max_w, uint16_t *max_h)
+zs_get_gen6_max_extent(const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info,
+ uint16_t *max_w, uint16_t *max_h)
{
const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
ILO_DEV_ASSERT(dev, 6, 8);
- switch (get_gen6_surface_type(dev, img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
*max_w = max_size;
*max_h = 1;
break;
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
*max_w = max_size;
*max_h = max_size;
break;
@@ -274,7 +251,7 @@ zs_get_gen6_depth_extent(const struct ilo_dev *dev,
w = img->width0;
h = img->height0;
- if (info->hiz_enable) {
+ if (info->hiz_vma) {
uint16_t align_w, align_h;
get_gen6_hiz_alignments(dev, info->z_img, &align_w, &align_h);
@@ -290,7 +267,7 @@ zs_get_gen6_depth_extent(const struct ilo_dev *dev,
h = align(h, align_h);
}
- get_gen6_max_extent(dev, img, &max_w, &max_h);
+ zs_get_gen6_max_extent(dev, info, &max_w, &max_h);
assert(w && h && w <= max_w && h <= max_h);
*width = w - 1;
@@ -319,16 +296,17 @@ zs_get_gen6_depth_slices(const struct ilo_dev *dev,
* surfaces. If the volume texture is MIP-mapped, this field specifies
* the depth of the base MIP level."
*/
- switch (get_gen6_surface_type(dev, img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
assert(img->array_size <= max_slice);
max_slice = img->array_size;
d = info->slice_count;
- if (info->is_cube_map) {
+ if (info->type == GEN6_SURFTYPE_CUBE) {
/*
* Minumum Array Element and Depth must be 0; Render Target View
* Extent is ignored.
@@ -408,8 +386,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
const struct ilo_state_zs_info *info)
{
uint16_t width, height, depth, array_base, view_extent;
- enum gen_surface_type type;
- enum gen_depth_format format;
uint32_t dw1, dw2, dw3, dw4;
ILO_DEV_ASSERT(dev, 6, 6);
@@ -420,37 +396,15 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
&view_extent))
return false;
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- (info->z_img) ? get_gen6_surface_type(dev, info->z_img) :
- get_gen6_surface_type(dev, info->s_img);
-
- format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
- GEN6_ZFORMAT_D32_FLOAT;
-
- /*
- * From the Ironlake PRM, volume 2 part 1, page 330:
- *
- * "If this field (Separate Stencil Buffer Enable) is disabled, the
- * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 321:
- *
- * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be set
- * to the same value (enabled or disabled) as Hierarchical Depth
- * Buffer Enable."
- */
- if (!info->hiz_enable && format == GEN6_ZFORMAT_D24_UNORM_X8_UINT)
- format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
-
/* info->z_readonly and info->s_readonly are ignored on Gen6 */
- dw1 = type << GEN6_DEPTH_DW1_TYPE__SHIFT |
+ dw1 = info->type << GEN6_DEPTH_DW1_TYPE__SHIFT |
GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT |
- format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
+ info->format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
if (info->z_img)
dw1 |= (info->z_img->bo_stride - 1) << GEN6_DEPTH_DW1_PITCH__SHIFT;
- if (info->hiz_enable || !info->z_img) {
+ if (info->hiz_vma || !info->z_img) {
dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
GEN6_DEPTH_DW1_SEPARATE_STENCIL;
}
@@ -471,8 +425,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
zs->depth[3] = dw4;
zs->depth[4] = 0;
- zs->depth_format = format;
-
return true;
}
@@ -481,8 +433,6 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
const struct ilo_dev *dev,
const struct ilo_state_zs_info *info)
{
- enum gen_surface_type type;
- enum gen_depth_format format;
uint16_t width, height, depth;
uint16_t array_base, view_extent;
uint32_t dw1, dw2, dw3, dw4, dw6;
@@ -495,20 +445,13 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
&view_extent))
return false;
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- (info->z_img) ? get_gen6_surface_type(dev, info->z_img) :
- get_gen6_surface_type(dev, info->s_img);
-
- format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
- GEN6_ZFORMAT_D32_FLOAT;
-
- dw1 = type << GEN7_DEPTH_DW1_TYPE__SHIFT |
- format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
+ dw1 = info->type << GEN7_DEPTH_DW1_TYPE__SHIFT |
+ info->format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
if (info->z_img) {
if (!info->z_readonly)
dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
- if (info->hiz_enable)
+ if (info->hiz_vma)
dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
dw1 |= (info->z_img->bo_stride - 1) << GEN7_DEPTH_DW1_PITCH__SHIFT;
@@ -539,8 +482,6 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
zs->depth[3] = dw4;
zs->depth[4] = dw6;
- zs->depth_format = format;
-
return true;
}
@@ -683,11 +624,15 @@ ilo_state_zs_init(struct ilo_state_zs *zs, const struct ilo_dev *dev,
else
ret &= zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(zs, dev);
- if (info->z_img && info->hiz_enable)
+ if (info->z_img && info->hiz_vma)
ret &= zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(zs, dev, info);
else
ret &= zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+ zs->z_vma = info->z_vma;
+ zs->s_vma = info->s_vma;
+ zs->hiz_vma = info->hiz_vma;
+
zs->z_readonly = info->z_readonly;
zs->s_readonly = info->s_readonly;
@@ -703,6 +648,8 @@ ilo_state_zs_init_for_null(struct ilo_state_zs *zs,
struct ilo_state_zs_info info;
memset(&info, 0, sizeof(info));
+ info.type = GEN6_SURFTYPE_NULL;
+ info.format = GEN6_ZFORMAT_D32_FLOAT;
return ilo_state_zs_init(zs, dev, &info);
}
@@ -720,8 +667,11 @@ ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
*/
assert(ilo_dev_gen(dev) >= ILO_GEN(7));
- zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE;
- zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+ if (zs->hiz_vma) {
+ zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE;
+ zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+ zs->hiz_vma = NULL;
+ }
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.h b/src/gallium/drivers/ilo/core/ilo_state_zs.h
index 98212daf74f..6a25a873897 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.h
@@ -29,28 +29,31 @@
#define ILO_STATE_ZS_H
#include "genhw/genhw.h"
-#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
+struct ilo_vma;
struct ilo_image;
struct ilo_state_zs_info {
- /* both are optional */
+ /* both optional */
const struct ilo_image *z_img;
const struct ilo_image *s_img;
+ uint8_t level;
+ uint16_t slice_base;
+ uint16_t slice_count;
+
+ const struct ilo_vma *z_vma;
+ const struct ilo_vma *s_vma;
+ const struct ilo_vma *hiz_vma;
+
+ enum gen_surface_type type;
+ enum gen_depth_format format;
/* ignored prior to Gen7 */
bool z_readonly;
bool s_readonly;
-
- bool hiz_enable;
- bool is_cube_map;
-
- uint8_t level;
- uint16_t slice_base;
- uint16_t slice_count;
};
struct ilo_state_zs {
@@ -58,16 +61,12 @@ struct ilo_state_zs {
uint32_t stencil[3];
uint32_t hiz[3];
- /* TODO move this to ilo_image */
- enum gen_depth_format depth_format;
+ const struct ilo_vma *z_vma;
+ const struct ilo_vma *s_vma;
+ const struct ilo_vma *hiz_vma;
bool z_readonly;
bool s_readonly;
-
- /* managed by users */
- struct intel_bo *depth_bo;
- struct intel_bo *stencil_bo;
- struct intel_bo *hiz_bo;
};
bool
@@ -83,11 +82,4 @@ bool
ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
const struct ilo_dev *dev);
-static inline enum gen_depth_format
-ilo_state_zs_get_depth_format(const struct ilo_state_zs *zs,
- const struct ilo_dev *dev)
-{
- return zs->depth_format;
-}
-
#endif /* ILO_STATE_ZS_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_buffer.h b/src/gallium/drivers/ilo/core/ilo_vma.h
index ca3c61ff890..ad2a1d4b33e 100644
--- a/src/gallium/drivers/ilo/core/ilo_buffer.h
+++ b/src/gallium/drivers/ilo/core/ilo_vma.h
@@ -1,7 +1,7 @@
/*
* Mesa 3-D graphics library
*
- * Copyright (C) 2012-2013 LunarG, Inc.
+ * Copyright (C) 2015 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,40 +25,49 @@
* Chia-I Wu <[email protected]>
*/
-#ifndef ILO_BUFFER_H
-#define ILO_BUFFER_H
-
-#include "intel_winsys.h"
+#ifndef ILO_VMA_H
+#define ILO_VMA_H
#include "ilo_core.h"
#include "ilo_debug.h"
#include "ilo_dev.h"
-struct ilo_buffer {
- unsigned bo_size;
+struct intel_bo;
+
+/**
+ * A virtual memory area.
+ */
+struct ilo_vma {
+ /* address space */
+ uint32_t vm_size;
+ uint32_t vm_alignment;
- /* managed by users */
+ /* backing storage */
struct intel_bo *bo;
+ uint32_t bo_offset;
};
-static inline void
-ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev,
- unsigned size, uint32_t bind, uint32_t flags)
+static inline bool
+ilo_vma_init(struct ilo_vma *vma, const struct ilo_dev *dev,
+ uint32_t size, uint32_t alignment)
{
- assert(ilo_is_zeroed(buf, sizeof(*buf)));
+ assert(ilo_is_zeroed(vma, sizeof(*vma)));
+ assert(size && alignment);
+
+ vma->vm_alignment = alignment;
+ vma->vm_size = size;
- buf->bo_size = size;
+ return true;
+}
+
+static inline void
+ilo_vma_set_bo(struct ilo_vma *vma, const struct ilo_dev *dev,
+ struct intel_bo *bo, uint32_t offset)
+{
+ assert(offset % vma->vm_alignment == 0);
- /*
- * From the Sandy Bridge PRM, volume 1 part 1, page 118:
- *
- * "For buffers, which have no inherent "height," padding requirements
- * are different. A buffer must be padded to the next multiple of 256
- * array elements, with an additional 16 bytes added beyond that to
- * account for the L1 cache line."
- */
- if (bind & PIPE_BIND_SAMPLER_VIEW)
- buf->bo_size = align(buf->bo_size, 256) + 16;
+ vma->bo = bo;
+ vma->bo_offset = offset;
}
-#endif /* ILO_BUFFER_H */
+#endif /* ILO_VMA_H */
diff --git a/src/gallium/drivers/ilo/ilo_blitter_blt.c b/src/gallium/drivers/ilo/ilo_blitter_blt.c
index d55dc35e360..66203e86137 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_blt.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_blt.c
@@ -127,7 +127,7 @@ ilo_blitter_blt_end(struct ilo_blitter *blitter, uint32_t swctrl)
static bool
buf_clear_region(struct ilo_blitter *blitter,
- struct ilo_buffer *buf, unsigned offset,
+ struct ilo_buffer_resource *buf, unsigned offset,
uint32_t val, unsigned size,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask)
@@ -140,8 +140,8 @@ buf_clear_region(struct ilo_blitter *blitter,
if (offset % cpp || size % cpp)
return false;
- dst.bo = buf->bo;
- dst.offset = offset;
+ dst.bo = buf->vma.bo;
+ dst.offset = buf->vma.bo_offset + offset;
ilo_blitter_blt_begin(blitter, GEN6_COLOR_BLT__SIZE *
(1 + size / 32764 / gen6_blt_max_scanlines),
@@ -179,25 +179,26 @@ buf_clear_region(struct ilo_blitter *blitter,
static bool
buf_copy_region(struct ilo_blitter *blitter,
- struct ilo_buffer *dst_buf, unsigned dst_offset,
- struct ilo_buffer *src_buf, unsigned src_offset,
+ struct ilo_buffer_resource *dst_buf, unsigned dst_offset,
+ struct ilo_buffer_resource *src_buf, unsigned src_offset,
unsigned size)
{
const uint8_t rop = 0xcc; /* SRCCOPY */
struct ilo_builder *builder = &blitter->ilo->cp->builder;
struct gen6_blt_bo dst, src;
- dst.bo = dst_buf->bo;
- dst.offset = dst_offset;
+ dst.bo = dst_buf->vma.bo;
+ dst.offset = dst_buf->vma.bo_offset + dst_offset;
dst.pitch = 0;
- src.bo = src_buf->bo;
- src.offset = src_offset;
+ src.bo = src_buf->vma.bo;
+ src.offset = src_buf->vma.bo_offset + src_offset;
src.pitch = 0;
ilo_blitter_blt_begin(blitter, GEN6_SRC_COPY_BLT__SIZE *
(1 + size / 32764 / gen6_blt_max_scanlines),
- dst_buf->bo, GEN6_TILING_NONE, src_buf->bo, GEN6_TILING_NONE);
+ dst_buf->vma.bo, GEN6_TILING_NONE,
+ src_buf->vma.bo, GEN6_TILING_NONE);
while (size) {
unsigned width, height;
@@ -258,14 +259,14 @@ tex_clear_region(struct ilo_blitter *blitter,
if (dst_box->width * cpp > gen6_blt_max_bytes_per_scanline)
return false;
- dst.bo = dst_tex->image.bo;
- dst.offset = 0;
+ dst.bo = dst_tex->vma.bo;
+ dst.offset = dst_tex->vma.bo_offset;
dst.pitch = dst_tex->image.bo_stride;
dst.tiling = dst_tex->image.tiling;
swctrl = ilo_blitter_blt_begin(blitter,
GEN6_XY_COLOR_BLT__SIZE * dst_box->depth,
- dst_tex->image.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE);
+ dst_tex->vma.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE);
for (slice = 0; slice < dst_box->depth; slice++) {
unsigned x, y;
@@ -299,7 +300,7 @@ tex_copy_region(struct ilo_blitter *blitter,
const struct pipe_box *src_box)
{
const struct util_format_description *desc =
- util_format_description(dst_tex->image.format);
+ util_format_description(dst_tex->image_format);
const unsigned max_extent = 32767; /* INT16_MAX */
const uint8_t rop = 0xcc; /* SRCCOPY */
struct ilo_builder *builder = &blitter->ilo->cp->builder;
@@ -347,13 +348,13 @@ tex_copy_region(struct ilo_blitter *blitter,
break;
}
- dst.bo = dst_tex->image.bo;
- dst.offset = 0;
+ dst.bo = dst_tex->vma.bo;
+ dst.offset = dst_tex->vma.bo_offset;
dst.pitch = dst_tex->image.bo_stride;
dst.tiling = dst_tex->image.tiling;
- src.bo = src_tex->image.bo;
- src.offset = 0;
+ src.bo = src_tex->vma.bo;
+ src.offset = src_tex->vma.bo_offset;
src.pitch = src_tex->image.bo_stride;
src.tiling = src_tex->image.tiling;
@@ -423,8 +424,8 @@ ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter,
src_box->height == 1 &&
src_box->depth == 1);
- success = buf_copy_region(blitter,
- ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size);
+ success = buf_copy_region(blitter, ilo_buffer_resource(dst), dst_offset,
+ ilo_buffer_resource(src), src_offset, size);
}
else if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) {
success = tex_copy_region(blitter,
@@ -488,7 +489,7 @@ ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter,
if (offset + size > end)
size = end - offset;
- success = buf_clear_region(blitter, ilo_buffer(rt->texture),
+ success = buf_clear_region(blitter, ilo_buffer_resource(rt->texture),
offset, packed.ui[0], size, mask, mask);
}
else {
diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
index 13c8f500680..86e67084d6e 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
@@ -318,7 +318,7 @@ hiz_can_clear_zs(const struct ilo_blitter *blitter,
* The truth is when HiZ is enabled, separate stencil is also enabled on
* all GENs. The depth buffer format cannot be combined depth/stencil.
*/
- switch (tex->image.format) {
+ switch (tex->image_format) {
case PIPE_FORMAT_Z16_UNORM:
if (ilo_dev_gen(blitter->ilo->dev) == ILO_GEN(6) &&
tex->base.width0 % 16)
@@ -355,7 +355,7 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter,
if (ilo_dev_gen(blitter->ilo->dev) >= ILO_GEN(8))
clear_value = fui(depth);
else
- clear_value = util_pack_z(tex->image.format, depth);
+ clear_value = util_pack_z(tex->image_format, depth);
ilo_blit_resolve_surface(blitter->ilo, zs,
ILO_TEXTURE_RENDER_WRITE | ILO_TEXTURE_CLEAR);
diff --git a/src/gallium/drivers/ilo/ilo_common.h b/src/gallium/drivers/ilo/ilo_common.h
index 9ebbf76e81e..3dbe79fb872 100644
--- a/src/gallium/drivers/ilo/ilo_common.h
+++ b/src/gallium/drivers/ilo/ilo_common.h
@@ -28,6 +28,14 @@
#ifndef ILO_COMMON_H
#define ILO_COMMON_H
+#include "pipe/p_format.h"
+#include "pipe/p_defines.h"
+
+#include "util/list.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_pointer.h"
+
#include "core/ilo_core.h"
#include "core/ilo_debug.h"
#include "core/ilo_dev.h"
diff --git a/src/gallium/drivers/ilo/ilo_context.c b/src/gallium/drivers/ilo/ilo_context.c
index 3d5c7b636a8..b9a16aab81d 100644
--- a/src/gallium/drivers/ilo/ilo_context.c
+++ b/src/gallium/drivers/ilo/ilo_context.c
@@ -62,6 +62,8 @@ ilo_flush(struct pipe_context *pipe,
(flags & PIPE_FLUSH_END_OF_FRAME) ? "frame end" : "user request");
if (f) {
+ struct pipe_screen *screen = pipe->screen;
+ screen->fence_reference(screen, f, NULL);
*f = ilo_screen_fence_create(pipe->screen, ilo->cp->last_submitted_bo);
}
}
diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c
index e8e1a4cd14c..433348d9326 100644
--- a/src/gallium/drivers/ilo/ilo_draw.c
+++ b/src/gallium/drivers/ilo/ilo_draw.c
@@ -444,6 +444,7 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
const struct pipe_draw_info *info)
{
const struct ilo_ib_state *ib = &ilo->state_vector.ib;
+ const struct ilo_vma *vma;
union {
const void *ptr;
const uint8_t *u8;
@@ -453,10 +454,12 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
/* we will draw with IB mapped */
if (ib->state.buffer) {
- u.ptr = intel_bo_map(ilo_buffer(ib->state.buffer)->bo, false);
+ vma = ilo_resource_get_vma(ib->state.buffer);
+ u.ptr = intel_bo_map(vma->bo, false);
if (u.ptr)
- u.u8 += ib->state.offset;
+ u.u8 += vma->bo_offset + ib->state.offset;
} else {
+ vma = NULL;
u.ptr = ib->state.user_buffer;
}
@@ -500,8 +503,8 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
#undef DRAW_VBO_WITH_SW_RESTART
- if (ib->state.buffer)
- intel_bo_unmap(ilo_buffer(ib->state.buffer)->bo);
+ if (vma)
+ intel_bo_unmap(vma->bo);
}
static bool
diff --git a/src/gallium/drivers/ilo/ilo_format.h b/src/gallium/drivers/ilo/ilo_format.h
index 4e955c09c14..0a19c02659e 100644
--- a/src/gallium/drivers/ilo/ilo_format.h
+++ b/src/gallium/drivers/ilo/ilo_format.h
@@ -165,4 +165,39 @@ ilo_format_translate_vertex(const struct ilo_dev *dev,
return ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
}
+static inline enum gen_depth_format
+ilo_format_translate_depth(const struct ilo_dev *dev,
+ enum pipe_format format)
+{
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_ZFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_ZFORMAT_D16_UNORM;
+ default:
+ assert(!"unknown depth format");
+ return GEN6_ZFORMAT_D32_FLOAT;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_ZFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_ZFORMAT_D16_UNORM;
+ default:
+ assert(!"unknown depth format");
+ return GEN6_ZFORMAT_D32_FLOAT;
+ }
+ }
+}
+
#endif /* ILO_FORMAT_H */
diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c
index ad053564294..3bf8646b344 100644
--- a/src/gallium/drivers/ilo/ilo_render_surface.c
+++ b/src/gallium/drivers/ilo/ilo_render_surface.c
@@ -42,14 +42,17 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder,
const struct pipe_stream_output_info *so_info,
int so_index)
{
- struct ilo_buffer *buf = ilo_buffer(so->buffer);
struct ilo_state_surface_buffer_info info;
struct ilo_state_surface surf;
ILO_DEV_ASSERT(builder->dev, 6, 6);
memset(&info, 0, sizeof(info));
- info.buf = buf;
+
+ info.vma = ilo_resource_get_vma(so->buffer);
+ info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
+ info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4;
+
info.access = ILO_STATE_SURFACE_ACCESS_DP_SVB;
switch (so_info->output[so_index].num_components) {
@@ -78,12 +81,9 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder,
info.struct_size =
so_info->stride[so_info->output[so_index].output_buffer] * 4;
- info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
- info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4;
memset(&surf, 0, sizeof(surf));
ilo_state_surface_init_for_buffer(&surf, builder->dev, &info);
- surf.bo = info.buf->bo;
return gen6_SURFACE_STATE(builder, &surf);
}
@@ -482,18 +482,19 @@ gen6_emit_launch_grid_surface_const(struct ilo_render *r,
return;
memset(&info, 0, sizeof(info));
- info.buf = ilo_buffer(session->input->buffer);
+
+ info.vma = ilo_resource_get_vma(session->input->buffer);
+ info.offset = session->input->buffer_offset;
+ info.size = session->input->buffer_size;
+
info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
info.format = GEN6_FORMAT_RAW;
info.format_size = 1;
info.struct_size = 1;
info.readonly = true;
- info.offset = session->input->buffer_offset;
- info.size = session->input->buffer_size;
memset(&surf, 0, sizeof(surf));
ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
- surf.bo = info.buf->bo;
assert(count == 1 && session->input->buffer);
surface_state[base] = gen6_SURFACE_STATE(r->builder, &surf);
@@ -538,23 +539,23 @@ gen6_emit_launch_grid_surface_global(struct ilo_render *r,
surface_state += base;
for (i = 0; i < count; i++) {
if (i < vec->global_binding.count && bindings[i].resource) {
- const struct ilo_buffer *buf = ilo_buffer(bindings[i].resource);
struct ilo_state_surface_buffer_info info;
struct ilo_state_surface surf;
assert(bindings[i].resource->target == PIPE_BUFFER);
memset(&info, 0, sizeof(info));
- info.buf = buf;
+
+ info.vma = ilo_resource_get_vma(bindings[i].resource);
+ info.size = info.vma->vm_size;
+
info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
info.format = GEN6_FORMAT_RAW;
info.format_size = 1;
info.struct_size = 1;
- info.size = buf->bo_size;
memset(&surf, 0, sizeof(surf));
ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
- surf.bo = info.buf->bo;
surface_state[i] = gen6_SURFACE_STATE(r->builder, &surf);
} else {
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index be9fd10a84c..9026ba9a983 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -25,7 +25,12 @@
* Chia-I Wu <[email protected]>
*/
+#include "core/ilo_state_vf.h"
+#include "core/ilo_state_sol.h"
+#include "core/ilo_state_surface.h"
+
#include "ilo_screen.h"
+#include "ilo_format.h"
#include "ilo_resource.h"
/*
@@ -83,6 +88,134 @@ resource_get_cpu_init(const struct pipe_resource *templ)
PIPE_BIND_STREAM_OUTPUT)) ? false : true;
}
+static enum gen_surface_type
+get_surface_type(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return GEN6_SURFTYPE_1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return GEN6_SURFTYPE_2D;
+ case PIPE_TEXTURE_3D:
+ return GEN6_SURFTYPE_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return GEN6_SURFTYPE_CUBE;
+ default:
+ assert(!"unknown texture target");
+ return GEN6_SURFTYPE_NULL;
+ }
+}
+
+static enum pipe_format
+resource_get_image_format(const struct pipe_resource *templ,
+ const struct ilo_dev *dev,
+ bool *separate_stencil_ret)
+{
+ enum pipe_format format = templ->format;
+ bool separate_stencil;
+
+ /* silently promote ETC1 */
+ if (templ->format == PIPE_FORMAT_ETC1_RGB8)
+ format = PIPE_FORMAT_R8G8B8X8_UNORM;
+
+ /* separate stencil buffers */
+ separate_stencil = false;
+ if ((templ->bind & PIPE_BIND_DEPTH_STENCIL) &&
+ util_format_is_depth_and_stencil(templ->format)) {
+ switch (templ->format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ /* Gen6 requires HiZ to be available for all levels */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) || templ->last_level == 0) {
+ format = PIPE_FORMAT_Z32_FLOAT;
+ separate_stencil = true;
+ }
+ break;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ format = PIPE_FORMAT_Z24X8_UNORM;
+ separate_stencil = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (separate_stencil_ret)
+ *separate_stencil_ret = separate_stencil;
+
+ return format;
+}
+
+static inline enum gen_surface_format
+pipe_to_surface_format(const struct ilo_dev *dev, enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_FORMAT_R32_FLOAT;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_FORMAT_R24_UNORM_X8_TYPELESS;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_FORMAT_R16_UNORM;
+ case PIPE_FORMAT_S8_UINT:
+ return GEN6_FORMAT_R8_UINT;
+ default:
+ return ilo_format_translate_color(dev, format);
+ }
+}
+
+static void
+resource_get_image_info(const struct pipe_resource *templ,
+ const struct ilo_dev *dev,
+ enum pipe_format image_format,
+ struct ilo_image_info *info)
+{
+ memset(info, 0, sizeof(*info));
+
+ info->type = get_surface_type(templ->target);
+
+ info->format = pipe_to_surface_format(dev, image_format);
+ info->interleaved_stencil = util_format_is_depth_and_stencil(image_format);
+ info->is_integer = util_format_is_pure_integer(image_format);
+ info->compressed = util_format_is_compressed(image_format);
+ info->block_width = util_format_get_blockwidth(image_format);
+ info->block_height = util_format_get_blockheight(image_format);
+ info->block_size = util_format_get_blocksize(image_format);
+
+ info->width = templ->width0;
+ info->height = templ->height0;
+ info->depth = templ->depth0;
+ info->array_size = templ->array_size;
+ info->level_count = templ->last_level + 1;
+ info->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
+
+ info->aux_disable = (templ->usage == PIPE_USAGE_STAGING);
+
+ if (templ->bind & PIPE_BIND_LINEAR)
+ info->valid_tilings = 1 << GEN6_TILING_NONE;
+
+ /*
+ * Tiled images must be mapped via GTT to get a linear view. Prefer linear
+ * images when the image size is greater than one-fourth of the mappable
+ * aperture.
+ */
+ if (templ->bind & (PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_TRANSFER_READ))
+ info->prefer_linear_threshold = dev->aperture_mappable / 4;
+
+ info->bind_surface_sampler = (templ->bind & PIPE_BIND_SAMPLER_VIEW);
+ info->bind_surface_dp_render = (templ->bind & PIPE_BIND_RENDER_TARGET);
+ info->bind_surface_dp_typed = (templ->bind &
+ (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE));
+ info->bind_zs = (templ->bind & PIPE_BIND_DEPTH_STENCIL);
+ info->bind_scanout = (templ->bind & PIPE_BIND_SCANOUT);
+ info->bind_cursor = (templ->bind & PIPE_BIND_CURSOR);
+}
+
static enum gen_surface_tiling
winsys_to_surface_tiling(enum intel_tiling_mode tiling)
{
@@ -178,8 +311,8 @@ tex_create_bo(struct ilo_texture *tex)
if (!bo)
return false;
- intel_bo_unref(tex->image.bo);
- tex->image.bo = bo;
+ intel_bo_unref(tex->vma.bo);
+ ilo_vma_set_bo(&tex->vma, &is->dev, bo, 0);
return true;
}
@@ -206,7 +339,7 @@ tex_create_separate_stencil(struct ilo_texture *tex)
tex->separate_s8 = ilo_texture(s8);
- assert(tex->separate_s8->image.format == PIPE_FORMAT_S8_UINT);
+ assert(tex->separate_s8->image_format == PIPE_FORMAT_S8_UINT);
return true;
}
@@ -215,15 +348,16 @@ static bool
tex_create_hiz(struct ilo_texture *tex)
{
const struct pipe_resource *templ = &tex->base;
+ const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height;
struct ilo_screen *is = ilo_screen(tex->base.screen);
struct intel_bo *bo;
- bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture",
- tex->image.aux.bo_stride * tex->image.aux.bo_height, false);
+ bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture", size, false);
if (!bo)
return false;
- tex->image.aux.bo = bo;
+ ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096);
+ ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0);
if (tex->imported) {
unsigned lv;
@@ -246,17 +380,18 @@ tex_create_hiz(struct ilo_texture *tex)
static bool
tex_create_mcs(struct ilo_texture *tex)
{
+ const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height;
struct ilo_screen *is = ilo_screen(tex->base.screen);
struct intel_bo *bo;
assert(tex->image.aux.enables == (1 << (tex->base.last_level + 1)) - 1);
- bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture",
- tex->image.aux.bo_stride * tex->image.aux.bo_height, false);
+ bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture", size, false);
if (!bo)
return false;
- tex->image.aux.bo = bo;
+ ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096);
+ ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0);
return true;
}
@@ -267,8 +402,8 @@ tex_destroy(struct ilo_texture *tex)
if (tex->separate_s8)
tex_destroy(tex->separate_s8);
- intel_bo_unref(tex->image.bo);
- intel_bo_unref(tex->image.aux.bo);
+ intel_bo_unref(tex->vma.bo);
+ intel_bo_unref(tex->aux_vma.bo);
tex_free_slices(tex);
FREE(tex);
@@ -277,24 +412,16 @@ tex_destroy(struct ilo_texture *tex)
static bool
tex_alloc_bos(struct ilo_texture *tex)
{
- struct ilo_screen *is = ilo_screen(tex->base.screen);
-
if (!tex->imported && !tex_create_bo(tex))
return false;
- /* allocate separate stencil resource */
- if (tex->image.separate_stencil && !tex_create_separate_stencil(tex))
- return false;
-
switch (tex->image.aux.type) {
case ILO_IMAGE_AUX_HIZ:
- if (!tex_create_hiz(tex) &&
- !ilo_image_disable_aux(&tex->image, &is->dev))
+ if (!tex_create_hiz(tex))
return false;
break;
case ILO_IMAGE_AUX_MCS:
- if (!tex_create_mcs(tex) &&
- !ilo_image_disable_aux(&tex->image, &is->dev))
+ if (!tex_create_mcs(tex))
return false;
break;
default:
@@ -304,9 +431,10 @@ tex_alloc_bos(struct ilo_texture *tex)
return true;
}
-static bool
+static struct intel_bo *
tex_import_handle(struct ilo_texture *tex,
- const struct winsys_handle *handle)
+ const struct winsys_handle *handle,
+ struct ilo_image_info *info)
{
struct ilo_screen *is = ilo_screen(tex->base.screen);
const struct pipe_resource *templ = &tex->base;
@@ -317,45 +445,94 @@ tex_import_handle(struct ilo_texture *tex,
bo = intel_winsys_import_handle(is->dev.winsys, name, handle,
tex->image.bo_height, &tiling, &pitch);
- if (!bo)
- return false;
+ /* modify image info */
+ if (bo) {
+ const uint8_t valid_tilings = 1 << winsys_to_surface_tiling(tiling);
- if (!ilo_image_init_for_imported(&tex->image, &is->dev, templ,
- winsys_to_surface_tiling(tiling), pitch)) {
- ilo_err("failed to import handle for texture\n");
- intel_bo_unref(bo);
- return false;
- }
+ if (info->valid_tilings && !(info->valid_tilings & valid_tilings)) {
+ intel_bo_unref(bo);
+ return NULL;
+ }
- tex->image.bo = bo;
+ info->valid_tilings = valid_tilings;
+ info->force_bo_stride = pitch;
- tex->imported = true;
+ /* assume imported RTs are also scanouts */
+ if (!info->bind_scanout)
+ info->bind_scanout = (templ->usage & PIPE_BIND_RENDER_TARGET);
+ }
- return true;
+ return bo;
}
static bool
tex_init_image(struct ilo_texture *tex,
- const struct winsys_handle *handle)
+ const struct winsys_handle *handle,
+ bool *separate_stencil)
{
struct ilo_screen *is = ilo_screen(tex->base.screen);
const struct pipe_resource *templ = &tex->base;
struct ilo_image *img = &tex->image;
+ struct intel_bo *imported_bo = NULL;;
+ struct ilo_image_info info;
+
+ tex->image_format = resource_get_image_format(templ,
+ &is->dev, separate_stencil);
+ resource_get_image_info(templ, &is->dev, tex->image_format, &info);
if (handle) {
- if (!tex_import_handle(tex, handle))
+ imported_bo = tex_import_handle(tex, handle, &info);
+ if (!imported_bo)
return false;
- } else {
- ilo_image_init(img, &is->dev, templ);
}
- if (img->bo_height > ilo_max_resource_size / img->bo_stride)
+ if (!ilo_image_init(img, &is->dev, &info)) {
+ intel_bo_unref(imported_bo);
return false;
+ }
+
+ /*
+ * HiZ requires 8x4 alignment and some levels might need HiZ disabled. It
+ * is generally fine except on Gen6, where HiZ and separate stencil must be
+ * enabled together. For PIPE_FORMAT_Z24X8_UNORM with separate stencil, we
+ * can live with stencil values being interleaved for levels where HiZ is
+ * disabled. But it is not the case for PIPE_FORMAT_Z32_FLOAT with
+ * separate stencil. If HiZ was disabled for a level, we had to change the
+ * format to PIPE_FORMAT_Z32_FLOAT_S8X24_UINT for the level and that format
+ * had a different bpp. In other words, HiZ has to be available for all
+ * levels.
+ */
+ if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
+ templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ tex->image_format == PIPE_FORMAT_Z32_FLOAT &&
+ img->aux.enables != (1 << templ->last_level)) {
+ tex->image_format = templ->format;
+ info.format = pipe_to_surface_format(&is->dev, tex->image_format);
+ info.interleaved_stencil = true;
+
+ memset(img, 0, sizeof(*img));
+ if (!ilo_image_init(img, &is->dev, &info)) {
+ intel_bo_unref(imported_bo);
+ return false;
+ }
+ }
+
+ if (img->bo_height > ilo_max_resource_size / img->bo_stride ||
+ !ilo_vma_init(&tex->vma, &is->dev, img->bo_stride * img->bo_height,
+ 4096)) {
+ intel_bo_unref(imported_bo);
+ return false;
+ }
+
+ if (imported_bo) {
+ ilo_vma_set_bo(&tex->vma, &is->dev, imported_bo, 0);
+ tex->imported = true;
+ }
if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
/* require on-the-fly tiling/untiling or format conversion */
- if (img->tiling == GEN8_TILING_W || img->separate_stencil ||
- img->format != templ->format)
+ if (img->tiling == GEN8_TILING_W || *separate_stencil ||
+ tex->image_format != templ->format)
return false;
}
@@ -371,6 +548,7 @@ tex_create(struct pipe_screen *screen,
const struct winsys_handle *handle)
{
struct ilo_texture *tex;
+ bool separate_stencil;
tex = CALLOC_STRUCT(ilo_texture);
if (!tex)
@@ -380,12 +558,13 @@ tex_create(struct pipe_screen *screen,
tex->base.screen = screen;
pipe_reference_init(&tex->base.reference, 1);
- if (!tex_init_image(tex, handle)) {
+ if (!tex_init_image(tex, handle, &separate_stencil)) {
FREE(tex);
return NULL;
}
- if (!tex_alloc_bos(tex)) {
+ if (!tex_alloc_bos(tex) ||
+ (separate_stencil && !tex_create_separate_stencil(tex))) {
tex_destroy(tex);
return NULL;
}
@@ -406,7 +585,7 @@ tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle)
else
tiling = surface_to_winsys_tiling(tex->image.tiling);
- err = intel_winsys_export_handle(is->dev.winsys, tex->image.bo, tiling,
+ err = intel_winsys_export_handle(is->dev.winsys, tex->vma.bo, tiling,
tex->image.bo_stride, tex->image.bo_height, handle);
return !err;
@@ -420,13 +599,12 @@ buf_create_bo(struct ilo_buffer_resource *buf)
const bool cpu_init = resource_get_cpu_init(&buf->base);
struct intel_bo *bo;
- bo = intel_winsys_alloc_bo(is->dev.winsys, name,
- buf->buffer.bo_size, cpu_init);
+ bo = intel_winsys_alloc_bo(is->dev.winsys, name, buf->bo_size, cpu_init);
if (!bo)
return false;
- intel_bo_unref(buf->buffer.bo);
- buf->buffer.bo = bo;
+ intel_bo_unref(buf->vma.bo);
+ ilo_vma_set_bo(&buf->vma, &is->dev, bo, 0);
return true;
}
@@ -434,7 +612,7 @@ buf_create_bo(struct ilo_buffer_resource *buf)
static void
buf_destroy(struct ilo_buffer_resource *buf)
{
- intel_bo_unref(buf->buffer.bo);
+ intel_bo_unref(buf->vma.bo);
FREE(buf);
}
@@ -443,6 +621,7 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
{
const struct ilo_screen *is = ilo_screen(screen);
struct ilo_buffer_resource *buf;
+ uint32_t alignment;
unsigned size;
buf = CALLOC_STRUCT(ilo_buffer_resource);
@@ -471,10 +650,17 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
ilo_dev_gen(&is->dev) < ILO_GEN(7.5))
size = align(size, 4096);
- ilo_buffer_init(&buf->buffer, &is->dev, size, templ->bind, templ->flags);
+ if (templ->bind & PIPE_BIND_VERTEX_BUFFER)
+ size = ilo_state_vertex_buffer_size(&is->dev, size, &alignment);
+ if (templ->bind & PIPE_BIND_INDEX_BUFFER)
+ size = ilo_state_index_buffer_size(&is->dev, size, &alignment);
+ if (templ->bind & PIPE_BIND_STREAM_OUTPUT)
+ size = ilo_state_sol_buffer_size(&is->dev, size, &alignment);
+
+ buf->bo_size = size;
+ ilo_vma_init(&buf->vma, &is->dev, buf->bo_size, 4096);
- if (buf->buffer.bo_size < templ->width0 ||
- buf->buffer.bo_size > ilo_max_resource_size ||
+ if (buf->bo_size < templ->width0 || buf->bo_size > ilo_max_resource_size ||
!buf_create_bo(buf)) {
FREE(buf);
return NULL;
@@ -487,13 +673,30 @@ static boolean
ilo_can_create_resource(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
+ struct ilo_screen *is = ilo_screen(screen);
+ enum pipe_format image_format;
+ struct ilo_image_info info;
struct ilo_image img;
if (templ->target == PIPE_BUFFER)
return (templ->width0 <= ilo_max_resource_size);
+ image_format = resource_get_image_format(templ, &is->dev, NULL);
+ resource_get_image_info(templ, &is->dev, image_format, &info);
+
memset(&img, 0, sizeof(img));
- ilo_image_init(&img, &ilo_screen(screen)->dev, templ);
+ ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
+
+ /* as in tex_init_image() */
+ if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
+ templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ image_format == PIPE_FORMAT_Z32_FLOAT &&
+ img.aux.enables != (1 << templ->last_level)) {
+ info.format = pipe_to_surface_format(&is->dev, templ->format);
+ info.interleaved_stencil = true;
+ memset(&img, 0, sizeof(img));
+ ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
+ }
return (img.bo_height <= ilo_max_resource_size / img.bo_stride);
}
diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h
index d602e0cbf70..8378af54741 100644
--- a/src/gallium/drivers/ilo/ilo_resource.h
+++ b/src/gallium/drivers/ilo/ilo_resource.h
@@ -29,8 +29,8 @@
#define ILO_RESOURCE_H
#include "core/intel_winsys.h"
-#include "core/ilo_buffer.h"
#include "core/ilo_image.h"
+#include "core/ilo_vma.h"
#include "ilo_common.h"
#include "ilo_screen.h"
@@ -92,7 +92,10 @@ struct ilo_texture {
bool imported;
+ enum pipe_format image_format;
struct ilo_image image;
+ struct ilo_vma vma;
+ struct ilo_vma aux_vma;
/* XXX thread-safety */
struct ilo_texture_slice *slices[PIPE_MAX_TEXTURE_LEVELS];
@@ -103,14 +106,15 @@ struct ilo_texture {
struct ilo_buffer_resource {
struct pipe_resource base;
- struct ilo_buffer buffer;
+ uint32_t bo_size;
+ struct ilo_vma vma;
};
-static inline struct ilo_buffer *
-ilo_buffer(struct pipe_resource *res)
+static inline struct ilo_buffer_resource *
+ilo_buffer_resource(struct pipe_resource *res)
{
- return (res && res->target == PIPE_BUFFER) ?
- &((struct ilo_buffer_resource *) res)->buffer : NULL;
+ return (struct ilo_buffer_resource *)
+ ((res && res->target == PIPE_BUFFER) ? res : NULL);
}
static inline struct ilo_texture *
@@ -127,13 +131,14 @@ bool
ilo_resource_rename_bo(struct pipe_resource *res);
/**
- * Return the bo of the resource.
+ * Return the VMA of the resource.
*/
-static inline struct intel_bo *
-ilo_resource_get_bo(struct pipe_resource *res)
+static inline const struct ilo_vma *
+ilo_resource_get_vma(struct pipe_resource *res)
{
return (res->target == PIPE_BUFFER) ?
- ilo_buffer(res)->bo : ilo_texture(res)->image.bo;
+ &((struct ilo_buffer_resource *) res)->vma :
+ &((struct ilo_texture *) res)->vma;
}
static inline struct ilo_texture_slice *
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 94105559b80..ab4d1377c9f 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -193,6 +193,7 @@ ilo_get_compute_param(struct pipe_screen *screen,
uint32_t max_clock_frequency;
uint32_t max_compute_units;
uint32_t images_supported;
+ uint32_t subgroup_size;
} val;
const void *ptr;
int size;
@@ -284,6 +285,13 @@ ilo_get_compute_param(struct pipe_screen *screen,
ptr = &val.images_supported;
size = sizeof(val.images_supported);
break;
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ /* best case is actually SIMD32 */
+ val.subgroup_size = 16;
+
+ ptr = &val.subgroup_size;
+ size = sizeof(val.subgroup_size);
+ break;
default:
ptr = NULL;
size = 0;
@@ -443,6 +451,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_SM5:
return 0;
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return true;
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_QUERY_LOD:
@@ -457,6 +467,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -665,13 +677,6 @@ ilo_screen_fence_finish(struct pipe_screen *screen,
return signaled;
}
-static boolean
-ilo_screen_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- return ilo_screen_fence_finish(screen, fence, 0);
-}
-
/**
* Create a fence for \p bo. When \p bo is not NULL, it must be submitted
* before waited on or checked.
@@ -738,7 +743,6 @@ ilo_screen_create(struct intel_winsys *ws)
is->base.flush_frontbuffer = NULL;
is->base.fence_reference = ilo_screen_fence_reference;
- is->base.fence_signalled = ilo_screen_fence_signalled;
is->base.fence_finish = ilo_screen_fence_finish;
is->base.get_driver_query_info = NULL;
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 63534f33fa7..d89765a9d23 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -379,13 +379,12 @@ finalize_cbuf_state(struct ilo_context *ilo,
u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size,
cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource);
- cbuf->cso[i].info.buf = ilo_buffer(cbuf->cso[i].resource);
+ cbuf->cso[i].info.vma = ilo_resource_get_vma(cbuf->cso[i].resource);
cbuf->cso[i].info.offset = offset;
memset(&cbuf->cso[i].surface, 0, sizeof(cbuf->cso[i].surface));
ilo_state_surface_init_for_buffer(&cbuf->cso[i].surface,
ilo->dev, &cbuf->cso[i].info);
- cbuf->cso[i].surface.bo = cbuf->cso[i].info.buf->bo;
ilo->state_vector.dirty |= ILO_DIRTY_CBUF;
}
@@ -466,11 +465,9 @@ finalize_index_buffer(struct ilo_context *ilo)
memset(&info, 0, sizeof(info));
if (vec->ib.hw_resource) {
- info.buf = ilo_buffer(vec->ib.hw_resource);
- info.size = info.buf->bo_size;
+ info.vma = ilo_resource_get_vma(vec->ib.hw_resource);
+ info.size = info.vma->vm_size;
info.format = ilo_translate_index_size(vec->ib.hw_index_size);
-
- vec->ib.ib.bo = info.buf->bo;
}
ilo_state_index_buffer_set_info(&vec->ib.ib, dev, &info);
@@ -532,13 +529,11 @@ finalize_vertex_buffers(struct ilo_context *ilo)
const struct pipe_vertex_buffer *cso = &vec->vb.states[pipe_idx];
if (cso->buffer) {
- info.buf = ilo_buffer(cso->buffer);
+ info.vma = ilo_resource_get_vma(cso->buffer);
info.offset = cso->buffer_offset;
- info.size = info.buf->bo_size;
+ info.size = info.vma->vm_size - cso->buffer_offset;
info.stride = cso->stride;
-
- vec->vb.vb[i].bo = info.buf->bo;
} else {
memset(&info, 0, sizeof(info));
}
@@ -1566,24 +1561,23 @@ ilo_set_constant_buffer(struct pipe_context *pipe,
cso->info.size = buf[i].buffer_size;
if (buf[i].buffer) {
- cso->info.buf = ilo_buffer(buf[i].buffer);
+ cso->info.vma = ilo_resource_get_vma(buf[i].buffer);
cso->info.offset = buf[i].buffer_offset;
memset(&cso->surface, 0, sizeof(cso->surface));
ilo_state_surface_init_for_buffer(&cso->surface, dev, &cso->info);
- cso->surface.bo = cso->info.buf->bo;
cso->user_buffer = NULL;
cbuf->enabled_mask |= 1 << (index + i);
} else if (buf[i].user_buffer) {
- cso->info.buf = NULL;
+ cso->info.vma = NULL;
/* buffer_offset does not apply for user buffer */
cso->user_buffer = buf[i].user_buffer;
cbuf->enabled_mask |= 1 << (index + i);
} else {
- cso->info.buf = NULL;
+ cso->info.vma = NULL;
cso->info.size = 0;
cso->user_buffer = NULL;
@@ -1596,7 +1590,7 @@ ilo_set_constant_buffer(struct pipe_context *pipe,
pipe_resource_reference(&cso->resource, NULL);
- cso->info.buf = NULL;
+ cso->info.vma = NULL;
cso->info.size = 0;
cso->user_buffer = NULL;
@@ -1705,10 +1699,11 @@ ilo_set_framebuffer_state(struct pipe_context *pipe,
if (state->zsbuf) {
const struct ilo_surface_cso *cso =
(const struct ilo_surface_cso *) state->zsbuf;
+ const struct ilo_texture *tex = ilo_texture(cso->base.texture);
- fb->has_hiz = cso->u.zs.hiz_bo;
+ fb->has_hiz = cso->u.zs.hiz_vma;
fb->depth_offset_format =
- ilo_state_zs_get_depth_format(&cso->u.zs, dev);
+ ilo_format_translate_depth(dev, tex->image_format);
} else {
fb->has_hiz = false;
fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT;
@@ -1854,10 +1849,11 @@ ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader,
}
static void
-ilo_set_shader_resources(struct pipe_context *pipe,
- unsigned start, unsigned count,
- struct pipe_surface **surfaces)
+ilo_set_shader_images(struct pipe_context *pipe, unsigned shader,
+ unsigned start, unsigned count,
+ struct pipe_image_view **views)
{
+#if 0
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
struct ilo_resource_state *dst = &vec->resource;
unsigned i;
@@ -1886,6 +1882,7 @@ ilo_set_shader_resources(struct pipe_context *pipe,
}
vec->dirty |= ILO_DIRTY_RESOURCE;
+#endif
}
static void
@@ -1945,12 +1942,11 @@ ilo_create_stream_output_target(struct pipe_context *pipe,
target->base.buffer_size = buffer_size;
memset(&info, 0, sizeof(info));
- info.buf = ilo_buffer(res);
+ info.vma = ilo_resource_get_vma(res);
info.offset = buffer_offset;
info.size = buffer_size;
ilo_state_sol_buffer_init(&target->sb, dev, &info);
- target->sb.bo = info.buf->bo;
return &target->base;
}
@@ -2018,18 +2014,17 @@ ilo_create_sampler_view(struct pipe_context *pipe,
struct ilo_state_surface_buffer_info info;
memset(&info, 0, sizeof(info));
- info.buf = ilo_buffer(res);
+ info.vma = ilo_resource_get_vma(res);
+ info.offset = templ->u.buf.first_element * info.struct_size;
+ info.size = (templ->u.buf.last_element -
+ templ->u.buf.first_element + 1) * info.struct_size;
info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
info.format = ilo_format_translate_color(dev, templ->format);
info.format_size = util_format_get_blocksize(templ->format);
info.struct_size = info.format_size;
info.readonly = true;
- info.offset = templ->u.buf.first_element * info.struct_size;
- info.size = (templ->u.buf.last_element -
- templ->u.buf.first_element + 1) * info.struct_size;
ilo_state_surface_init_for_buffer(&view->surface, dev, &info);
- view->surface.bo = info.buf->bo;
} else {
struct ilo_texture *tex = ilo_texture(res);
struct ilo_state_surface_image_info info;
@@ -2042,32 +2037,31 @@ ilo_create_sampler_view(struct pipe_context *pipe,
}
memset(&info, 0, sizeof(info));
+
info.img = &tex->image;
+ info.level_base = templ->u.tex.first_level;
+ info.level_count = templ->u.tex.last_level -
+ templ->u.tex.first_level + 1;
+ info.slice_base = templ->u.tex.first_layer;
+ info.slice_count = templ->u.tex.last_layer -
+ templ->u.tex.first_layer + 1;
+ info.vma = &tex->vma;
info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
+ info.type = tex->image.type;
if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
- tex->image.separate_stencil) {
+ tex->separate_s8) {
info.format = ilo_format_translate_texture(dev,
PIPE_FORMAT_Z32_FLOAT);
} else {
info.format = ilo_format_translate_texture(dev, templ->format);
}
- info.is_cube_map = (tex->image.target == PIPE_TEXTURE_CUBE ||
- tex->image.target == PIPE_TEXTURE_CUBE_ARRAY);
info.is_array = util_resource_is_array_texture(&tex->base);
info.readonly = true;
- info.level_base = templ->u.tex.first_level;
- info.level_count = templ->u.tex.last_level -
- templ->u.tex.first_level + 1;
- info.slice_base = templ->u.tex.first_layer;
- info.slice_count = templ->u.tex.last_layer -
- templ->u.tex.first_layer + 1;
-
ilo_state_surface_init_for_image(&view->surface, dev, &info);
- view->surface.bo = info.img->bo;
}
return &view->base;
@@ -2111,18 +2105,27 @@ ilo_create_surface(struct pipe_context *pipe,
assert(tex->base.target != PIPE_BUFFER);
memset(&info, 0, sizeof(info));
+
info.img = &tex->image;
- info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER;
- info.format = ilo_format_translate_render(dev, templ->format);
- info.is_array = util_resource_is_array_texture(&tex->base);
info.level_base = templ->u.tex.level;
info.level_count = 1;
info.slice_base = templ->u.tex.first_layer;
info.slice_count = templ->u.tex.last_layer -
templ->u.tex.first_layer + 1;
+ info.vma = &tex->vma;
+ if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level))
+ info.aux_vma = &tex->aux_vma;
+
+ info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER;
+
+ info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ?
+ GEN6_SURFTYPE_2D : tex->image.type;
+
+ info.format = ilo_format_translate_render(dev, templ->format);
+ info.is_array = util_resource_is_array_texture(&tex->base);
+
ilo_state_surface_init_for_image(&surf->u.rt, dev, &info);
- surf->u.rt.bo = info.img->bo;
} else {
struct ilo_state_zs_info info;
@@ -2131,13 +2134,19 @@ ilo_create_surface(struct pipe_context *pipe,
memset(&info, 0, sizeof(info));
if (templ->format == PIPE_FORMAT_S8_UINT) {
+ info.s_vma = &tex->vma;
info.s_img = &tex->image;
} else {
+ info.z_vma = &tex->vma;
info.z_img = &tex->image;
- info.s_img = (tex->separate_s8) ? &tex->separate_s8->image : NULL;
- info.hiz_enable =
- ilo_image_can_enable_aux(&tex->image, templ->u.tex.level);
+ if (tex->separate_s8) {
+ info.s_vma = &tex->separate_s8->vma;
+ info.s_img = &tex->separate_s8->image;
+ }
+
+ if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level))
+ info.hiz_vma = &tex->aux_vma;
}
info.level = templ->u.tex.level;
@@ -2145,16 +2154,15 @@ ilo_create_surface(struct pipe_context *pipe,
info.slice_count = templ->u.tex.last_layer -
templ->u.tex.first_layer + 1;
- ilo_state_zs_init(&surf->u.zs, dev, &info);
+ info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ?
+ GEN6_SURFTYPE_2D : tex->image.type;
- if (info.z_img) {
- surf->u.zs.depth_bo = info.z_img->bo;
- if (info.hiz_enable)
- surf->u.zs.hiz_bo = info.z_img->aux.bo;
- }
+ info.format = ilo_format_translate_depth(dev, tex->image_format);
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && !info.hiz_vma &&
+ tex->image_format == PIPE_FORMAT_Z24X8_UNORM)
+ info.format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
- if (info.s_img)
- surf->u.zs.stencil_bo = info.s_img->bo;
+ ilo_state_zs_init(&surf->u.zs, dev, &info);
}
return &surf->base;
@@ -2339,7 +2347,7 @@ ilo_init_state_functions(struct ilo_context *ilo)
ilo->base.set_scissor_states = ilo_set_scissor_states;
ilo->base.set_viewport_states = ilo_set_viewport_states;
ilo->base.set_sampler_views = ilo_set_sampler_views;
- ilo->base.set_shader_resources = ilo_set_shader_resources;
+ ilo->base.set_shader_images = ilo_set_shader_images;
ilo->base.set_vertex_buffers = ilo_set_vertex_buffers;
ilo->base.set_index_buffer = ilo_set_index_buffer;
@@ -2451,7 +2459,6 @@ void
ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct pipe_resource *res)
{
- struct intel_bo *bo = ilo_resource_get_bo(res);
uint32_t states = 0;
unsigned sh, i;
@@ -2482,10 +2489,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
for (i = 0; i < vec->so.count; i++) {
if (vec->so.states[i]->buffer == res) {
- struct ilo_stream_output_target *target =
- (struct ilo_stream_output_target *) vec->so.states[i];
-
- target->sb.bo = ilo_buffer(res)->bo;
states |= ILO_DIRTY_SO;
break;
}
@@ -2503,7 +2506,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
[PIPE_SHADER_GEOMETRY] = ILO_DIRTY_VIEW_GS,
[PIPE_SHADER_COMPUTE] = ILO_DIRTY_VIEW_CS,
};
- cso->surface.bo = bo;
states |= view_dirty_bits[sh];
break;
@@ -2515,7 +2517,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct ilo_cbuf_cso *cbuf = &vec->cbuf[sh].cso[i];
if (cbuf->resource == res) {
- cbuf->surface.bo = bo;
states |= ILO_DIRTY_CBUF;
break;
}
@@ -2528,7 +2529,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
(struct ilo_surface_cso *) vec->resource.states[i];
if (cso->base.texture == res) {
- cso->u.rt.bo = bo;
states |= ILO_DIRTY_RESOURCE;
break;
}
@@ -2540,27 +2540,19 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct ilo_surface_cso *cso =
(struct ilo_surface_cso *) vec->fb.state.cbufs[i];
if (cso && cso->base.texture == res) {
- cso->u.rt.bo = bo;
states |= ILO_DIRTY_FB;
break;
}
}
- if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res) {
- struct ilo_surface_cso *cso =
- (struct ilo_surface_cso *) vec->fb.state.zsbuf;
-
- cso->u.zs.depth_bo = bo;
-
+ if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res)
states |= ILO_DIRTY_FB;
- }
}
for (i = 0; i < vec->cs_resource.count; i++) {
struct ilo_surface_cso *cso =
(struct ilo_surface_cso *) vec->cs_resource.states[i];
if (cso->base.texture == res) {
- cso->u.rt.bo = bo;
states |= ILO_DIRTY_CS_RESOURCE;
break;
}
diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h
index 3e6fd8a2554..66c93007eb1 100644
--- a/src/gallium/drivers/ilo/ilo_state.h
+++ b/src/gallium/drivers/ilo/ilo_state.h
@@ -202,7 +202,7 @@ struct ilo_cbuf_state {
};
struct ilo_resource_state {
- struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
+ struct pipe_surface *states[PIPE_MAX_SHADER_IMAGES];
unsigned count;
};
diff --git a/src/gallium/drivers/ilo/ilo_transfer.c b/src/gallium/drivers/ilo/ilo_transfer.c
index ec41473f94a..5abd3bebf68 100644
--- a/src/gallium/drivers/ilo/ilo_transfer.c
+++ b/src/gallium/drivers/ilo/ilo_transfer.c
@@ -100,7 +100,7 @@ resource_get_transfer_method(struct pipe_resource *res,
m = ILO_TRANSFER_MAP_SW_ZS;
need_convert = true;
}
- } else if (tex->image.format != tex->base.format) {
+ } else if (tex->image_format != tex->base.format) {
m = ILO_TRANSFER_MAP_SW_CONVERT;
need_convert = true;
}
@@ -268,23 +268,27 @@ xfer_alloc_staging_sys(struct ilo_transfer *xfer)
static void *
xfer_map(struct ilo_transfer *xfer)
{
+ const struct ilo_vma *vma;
void *ptr;
switch (xfer->method) {
case ILO_TRANSFER_MAP_CPU:
- ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
- xfer->base.usage & PIPE_TRANSFER_WRITE);
+ vma = ilo_resource_get_vma(xfer->base.resource);
+ ptr = intel_bo_map(vma->bo, xfer->base.usage & PIPE_TRANSFER_WRITE);
break;
case ILO_TRANSFER_MAP_GTT:
- ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
+ vma = ilo_resource_get_vma(xfer->base.resource);
+ ptr = intel_bo_map_gtt(vma->bo);
break;
case ILO_TRANSFER_MAP_GTT_ASYNC:
- ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
+ vma = ilo_resource_get_vma(xfer->base.resource);
+ ptr = intel_bo_map_gtt_async(vma->bo);
break;
case ILO_TRANSFER_MAP_STAGING:
{
const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
- struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
+
+ vma = ilo_resource_get_vma(xfer->staging.res);
/*
* We want a writable, optionally persistent and coherent, mapping
@@ -292,25 +296,29 @@ xfer_map(struct ilo_transfer *xfer)
* this turns out to be fairly simple.
*/
if (is->dev.has_llc)
- ptr = intel_bo_map(bo, true);
+ ptr = intel_bo_map(vma->bo, true);
else
- ptr = intel_bo_map_gtt(bo);
+ ptr = intel_bo_map_gtt(vma->bo);
if (ptr && xfer->staging.res->target == PIPE_BUFFER)
ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
-
}
break;
case ILO_TRANSFER_MAP_SW_CONVERT:
case ILO_TRANSFER_MAP_SW_ZS:
+ vma = NULL;
ptr = xfer->staging.sys;
break;
default:
assert(!"unknown mapping method");
+ vma = NULL;
ptr = NULL;
break;
}
+ if (ptr && vma)
+ ptr = (void *) ((char *) ptr + vma->bo_offset);
+
return ptr;
}
@@ -324,10 +332,10 @@ xfer_unmap(struct ilo_transfer *xfer)
case ILO_TRANSFER_MAP_CPU:
case ILO_TRANSFER_MAP_GTT:
case ILO_TRANSFER_MAP_GTT_ASYNC:
- intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
+ intel_bo_unmap(ilo_resource_get_vma(xfer->base.resource)->bo);
break;
case ILO_TRANSFER_MAP_STAGING:
- intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
+ intel_bo_unmap(ilo_resource_get_vma(xfer->staging.res)->bo);
break;
default:
break;
@@ -541,9 +549,12 @@ tex_staging_sys_map_bo(struct ilo_texture *tex,
if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
!linear_view))
- ptr = intel_bo_map(tex->image.bo, !for_read_back);
+ ptr = intel_bo_map(tex->vma.bo, !for_read_back);
else
- ptr = intel_bo_map_gtt(tex->image.bo);
+ ptr = intel_bo_map_gtt(tex->vma.bo);
+
+ if (ptr)
+ ptr = (void *) ((char *) ptr + tex->vma.bo_offset);
return ptr;
}
@@ -551,7 +562,7 @@ tex_staging_sys_map_bo(struct ilo_texture *tex,
static void
tex_staging_sys_unmap_bo(struct ilo_texture *tex)
{
- intel_bo_unmap(tex->image.bo);
+ intel_bo_unmap(tex->vma.bo);
}
static bool
@@ -590,7 +601,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
- assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
+ assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
dst_cpp = 4;
dst_s8_pos = 3;
@@ -598,7 +609,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
}
else {
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
- assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
+ assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
dst_cpp = 8;
dst_s8_pos = 4;
@@ -644,7 +655,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
tex_staging_sys_unmap_bo(s8_tex);
}
else {
- assert(tex->image.format == PIPE_FORMAT_S8_UINT);
+ assert(tex->image_format == PIPE_FORMAT_S8_UINT);
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y;
@@ -717,7 +728,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
- assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
+ assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
src_cpp = 4;
src_s8_pos = 3;
@@ -725,7 +736,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
}
else {
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
- assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
+ assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
src_cpp = 8;
src_s8_pos = 4;
@@ -771,7 +782,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
tex_staging_sys_unmap_bo(s8_tex);
}
else {
- assert(tex->image.format == PIPE_FORMAT_S8_UINT);
+ assert(tex->image_format == PIPE_FORMAT_S8_UINT);
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y;
@@ -829,8 +840,8 @@ tex_staging_sys_convert_write(struct ilo_texture *tex,
else
dst_slice_stride = 0;
- if (unlikely(tex->image.format == tex->base.format)) {
- util_copy_box(dst, tex->image.format, tex->image.bo_stride,
+ if (unlikely(tex->image_format == tex->base.format)) {
+ util_copy_box(dst, tex->image_format, tex->image.bo_stride,
dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
0, 0, 0);
@@ -842,7 +853,7 @@ tex_staging_sys_convert_write(struct ilo_texture *tex,
switch (tex->base.format) {
case PIPE_FORMAT_ETC1_RGB8:
- assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
+ assert(tex->image_format == PIPE_FORMAT_R8G8B8X8_UNORM);
for (slice = 0; slice < box->depth; slice++) {
const void *src =
@@ -1055,7 +1066,7 @@ choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
return false;
/* see if we can avoid blocking */
- if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
+ if (is_bo_busy(ilo, ilo_resource_get_vma(res)->bo, &need_submit)) {
bool resource_renamed;
if (!xfer_unblock(xfer, &resource_renamed)) {
@@ -1078,11 +1089,11 @@ static void
buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
unsigned usage, int offset, int size, const void *data)
{
- struct ilo_buffer *buf = ilo_buffer(res);
+ struct ilo_buffer_resource *buf = ilo_buffer_resource(res);
bool need_submit;
/* see if we can avoid blocking */
- if (is_bo_busy(ilo, buf->bo, &need_submit)) {
+ if (is_bo_busy(ilo, buf->vma.bo, &need_submit)) {
bool unblocked = false;
if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
@@ -1103,9 +1114,12 @@ buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
templ.bind = PIPE_BIND_TRANSFER_WRITE;
staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
if (staging) {
+ const struct ilo_vma *staging_vma = ilo_resource_get_vma(staging);
struct pipe_box staging_box;
- intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
+ /* offset by staging_vma->bo_offset for pwrite */
+ intel_bo_pwrite(staging_vma->bo, staging_vma->bo_offset,
+ size, data);
u_box_1d(0, size, &staging_box);
ilo_blitter_blt_copy_resource(ilo->blitter,
@@ -1123,7 +1137,8 @@ buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
ilo_cp_submit(ilo->cp, "syncing for pwrites");
}
- intel_bo_pwrite(buf->bo, offset, size, data);
+ /* offset by buf->vma.bo_offset for pwrite */
+ intel_bo_pwrite(buf->vma.bo, buf->vma.bo_offset + offset, size, data);
}
static void
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
index 1de43f77ee0..1feb415c9e5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
@@ -78,7 +78,7 @@ lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
/**
* Whether the blending factors are complementary of each other.
*/
-static INLINE boolean
+static inline boolean
lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)
{
return dst_factor == (src_factor ^ 0x10);
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 0d47c0d517c..c273b25f096 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -169,7 +169,7 @@ llvmpipe_user_buffer_create(struct pipe_screen *screen,
unsigned bind_flags);
-static INLINE struct llvmpipe_context *
+static inline struct llvmpipe_context *
llvmpipe_context( struct pipe_context *pipe )
{
return (struct llvmpipe_context *)pipe;
diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h
index e0f7d8e1bc3..1038c5fe151 100644
--- a/src/gallium/drivers/llvmpipe/lp_debug.h
+++ b/src/gallium/drivers/llvmpipe/lp_debug.h
@@ -71,7 +71,7 @@ extern int LP_DEBUG;
void st_debug_init( void );
-static INLINE void
+static inline void
LP_DBG( unsigned flag, const char *fmt, ... )
{
if (LP_DEBUG & flag)
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h
index 3c591187801..d7f0c153ec8 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.h
+++ b/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -72,7 +72,7 @@ llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
void
lp_fence_destroy(struct lp_fence *fence);
-static INLINE void
+static inline void
lp_fence_reference(struct lp_fence **ptr,
struct lp_fence *f)
{
@@ -85,7 +85,7 @@ lp_fence_reference(struct lp_fence **ptr,
*ptr = f;
}
-static INLINE boolean
+static inline boolean
lp_fence_issued(const struct lp_fence *fence)
{
return fence->issued;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index c209f47f0f5..c19f9318006 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -184,7 +184,7 @@ union lp_rast_cmd_arg {
/* Cast wrappers. Hopefully these compile to noops!
*/
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
{
union lp_rast_cmd_arg arg;
@@ -192,7 +192,7 @@ lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
return arg;
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
unsigned plane_mask)
{
@@ -208,7 +208,7 @@ lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
* All planes are enabled, so instead of the plane mask we pass the upper
* left coordinates of the a block that fully encloses the triangle.
*/
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_triangle_contained( const struct lp_rast_triangle *triangle,
unsigned x, unsigned y)
{
@@ -218,7 +218,7 @@ lp_rast_arg_triangle_contained( const struct lp_rast_triangle *triangle,
return arg;
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_state( const struct lp_rast_state *state )
{
union lp_rast_cmd_arg arg;
@@ -226,7 +226,7 @@ lp_rast_arg_state( const struct lp_rast_state *state )
return arg;
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_fence( struct lp_fence *fence )
{
union lp_rast_cmd_arg arg;
@@ -235,7 +235,7 @@ lp_rast_arg_fence( struct lp_fence *fence )
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
{
union lp_rast_cmd_arg arg;
@@ -245,7 +245,7 @@ lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_query( struct llvmpipe_query *pq )
{
union lp_rast_cmd_arg arg;
@@ -253,7 +253,7 @@ lp_rast_arg_query( struct llvmpipe_query *pq )
return arg;
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_null( void )
{
union lp_rast_cmd_arg arg;
@@ -312,7 +312,7 @@ lp_debug_draw_bins_by_coverage( struct lp_scene *scene );
#include <emmintrin.h>
#include "util/u_sse.h"
-static INLINE __m128i
+static inline __m128i
lp_plane_to_m128i(const struct lp_rast_plane *plane)
{
return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index e6ebbcd526d..9aa7e874657 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -145,7 +145,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
* Get the pointer to a 4x4 color block (within a 64x64 tile).
* \param x, y location of 4x4 block in window coords
*/
-static INLINE uint8_t *
+static inline uint8_t *
lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
unsigned buf, unsigned x, unsigned y,
unsigned layer)
@@ -186,7 +186,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
* Get the pointer to a 4x4 depth block (within a 64x64 tile).
* \param x, y location of 4x4 block in window coords
*/
-static INLINE uint8_t *
+static inline uint8_t *
lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
unsigned x, unsigned y, unsigned layer)
{
@@ -222,7 +222,7 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
* triangle in/out tests.
* \param x, y location of 4x4 block in window coords
*/
-static INLINE void
+static inline void
lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y )
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 41f6fbfa059..c9b9221d87c 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -63,7 +63,7 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
-static INLINE unsigned
+static inline unsigned
build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
{
unsigned mask = 0;
@@ -94,7 +94,7 @@ build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
}
-static INLINE void
+static inline void
build_masks(int64_t c,
int64_t cdiff,
int64_t dcdx,
@@ -167,7 +167,7 @@ lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
#include "util/u_sse.h"
-static INLINE void
+static inline void
build_masks_32(int c,
int cdiff,
int dcdx,
@@ -213,7 +213,7 @@ build_masks_32(int c,
}
-static INLINE unsigned
+static inline unsigned
build_mask_linear_32(int c, int dcdx, int dcdy)
{
__m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
@@ -239,7 +239,7 @@ build_mask_linear_32(int c, int dcdx, int dcdy)
return _mm_movemask_epi8(result);
}
-static INLINE unsigned
+static inline unsigned
sign_bits4(const __m128i *cstep, int cdiff)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index a226ff0c485..b1464bb54c4 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -207,7 +207,7 @@ boolean lp_scene_is_resource_referenced(const struct lp_scene *scene,
* Allocate space for a command/data in the bin's data buffer.
* Grow the block list if needed.
*/
-static INLINE void *
+static inline void *
lp_scene_alloc( struct lp_scene *scene, unsigned size)
{
struct data_block_list *list = &scene->data;
@@ -240,7 +240,7 @@ lp_scene_alloc( struct lp_scene *scene, unsigned size)
/**
* As above, but with specific alignment.
*/
-static INLINE void *
+static inline void *
lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
unsigned alignment )
{
@@ -272,7 +272,7 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
/* Put back data if we decide not to use it, eg. culled triangles.
*/
-static INLINE void
+static inline void
lp_scene_putback_data( struct lp_scene *scene, unsigned size)
{
struct data_block_list *list = &scene->data;
@@ -282,7 +282,7 @@ lp_scene_putback_data( struct lp_scene *scene, unsigned size)
/** Return pointer to a particular tile's bin. */
-static INLINE struct cmd_bin *
+static inline struct cmd_bin *
lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y)
{
return &scene->tile[x][y];
@@ -296,7 +296,7 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y);
/* Add a command to bin[x][y].
*/
-static INLINE boolean
+static inline boolean
lp_scene_bin_command( struct lp_scene *scene,
unsigned x, unsigned y,
unsigned cmd,
@@ -328,7 +328,7 @@ lp_scene_bin_command( struct lp_scene *scene,
}
-static INLINE boolean
+static inline boolean
lp_scene_bin_cmd_with_state( struct lp_scene *scene,
unsigned x, unsigned y,
const struct lp_rast_state *state,
@@ -354,7 +354,7 @@ lp_scene_bin_cmd_with_state( struct lp_scene *scene,
/* Add a command to all active bins.
*/
-static INLINE boolean
+static inline boolean
lp_scene_bin_everywhere( struct lp_scene *scene,
unsigned cmd,
const union lp_rast_cmd_arg arg )
@@ -371,7 +371,7 @@ lp_scene_bin_everywhere( struct lp_scene *scene,
}
-static INLINE unsigned
+static inline unsigned
lp_scene_get_num_bins( const struct lp_scene *scene )
{
return scene->tiles_x * scene->tiles_y;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 47f1897c732..14eeab03387 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -288,10 +288,14 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
return 0;
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
}
/* should only get here on unhandled cases */
@@ -529,18 +533,6 @@ llvmpipe_fence_reference(struct pipe_screen *screen,
/**
- * Has the fence been executed/finished?
- */
-static boolean
-llvmpipe_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct lp_fence *f = (struct lp_fence *) fence;
- return lp_fence_signalled(f);
-}
-
-
-/**
* Wait for the fence to finish.
*/
static boolean
@@ -550,6 +542,9 @@ llvmpipe_fence_finish(struct pipe_screen *screen,
{
struct lp_fence *f = (struct lp_fence *) fence_handle;
+ if (!timeout)
+ return lp_fence_signalled(f);
+
lp_fence_wait(f);
return TRUE;
}
@@ -601,7 +596,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
screen->base.context_create = llvmpipe_create_context;
screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
screen->base.fence_reference = llvmpipe_fence_reference;
- screen->base.fence_signalled = llvmpipe_fence_signalled;
screen->base.fence_finish = llvmpipe_fence_finish;
screen->base.get_timestamp = llvmpipe_get_timestamp;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h
index 8b8ea1afac9..00bf20c8c5f 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.h
+++ b/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -62,7 +62,7 @@ struct llvmpipe_screen
-static INLINE struct llvmpipe_screen *
+static inline struct llvmpipe_screen *
llvmpipe_screen( struct pipe_screen *pipe )
{
return (struct llvmpipe_screen *)pipe;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index c944ad26756..a42df2dc9e0 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -159,7 +159,7 @@ void
lp_setup_end_query(struct lp_setup_context *setup,
struct llvmpipe_query *pq);
-static INLINE unsigned
+static inline unsigned
lp_clamp_viewport_idx(int idx)
{
return (PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index 6c05b90e64a..a190254d9df 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -233,7 +233,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
-static INLINE int subpixel_snap( float a )
+static inline int subpixel_snap( float a )
{
return util_iround(FIXED_ONE * a);
}
@@ -262,14 +262,14 @@ print_line(struct lp_setup_context *setup,
}
-static INLINE boolean sign(float x){
+static inline boolean sign(float x){
return x >= 0;
}
/* Used on positive floats only:
*/
-static INLINE float fracf(float f)
+static inline float fracf(float f)
{
return f - floorf(f);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index f065676a7fb..75544b52493 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -296,7 +296,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
}
-static INLINE int
+static inline int
subpixel_snap(float a)
{
return util_iround(FIXED_ONE * a);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index a2f55ed3a1e..98a9d4bc28b 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -48,13 +48,13 @@
#include <emmintrin.h>
#endif
-static INLINE int
+static inline int
subpixel_snap(float a)
{
return util_iround(FIXED_ONE * a);
}
-static INLINE float
+static inline float
fixed_to_float(int a)
{
return a * (1.0f / FIXED_ONE);
@@ -579,7 +579,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
*
* Undefined if no bit set exists, so code should check against 0 first.
*/
-static INLINE uint32_t
+static inline uint32_t
floor_pot(uint32_t n)
{
#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
@@ -841,7 +841,7 @@ static void retry_triangle_ccw( struct lp_setup_context *setup,
/**
* Calculate fixed position data for a triangle
*/
-static INLINE void
+static inline void
calc_fixed_position( struct lp_setup_context *setup,
struct fixed_position* position,
const float (*v0)[4],
@@ -873,7 +873,7 @@ calc_fixed_position( struct lp_setup_context *setup,
* Rotate a triangle, flipping its clockwise direction,
* Swaps values for xy[0] and xy[1]
*/
-static INLINE void
+static inline void
rotate_fixed_position_01( struct fixed_position* position )
{
int x, y;
@@ -898,7 +898,7 @@ rotate_fixed_position_01( struct fixed_position* position )
* Rotate a triangle, flipping its clockwise direction,
* Swaps values for xy[1] and xy[2]
*/
-static INLINE void
+static inline void
rotate_fixed_position_12( struct fixed_position* position )
{
int x, y;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index 89992007849..534c5f48a64 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -122,7 +122,7 @@ lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim)
typedef const float (*const_float4_ptr)[4];
-static INLINE const_float4_ptr get_vert( const void *vertex_buffer,
+static inline const_float4_ptr get_vert( const void *vertex_buffer,
int index,
int stride )
{
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index b5ce8683f1a..fd6c49aacd8 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -840,7 +840,7 @@ store_unswizzled_block(struct gallivm_state *gallivm,
*
* A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
*/
-static INLINE boolean
+static inline boolean
is_arithmetic_format(const struct util_format_description *format_desc)
{
boolean arith = false;
@@ -860,7 +860,7 @@ is_arithmetic_format(const struct util_format_description *format_desc)
* to floats for blending, and furthermore has "natural" packed AoS -> unpacked
* SoA conversion.
*/
-static INLINE boolean
+static inline boolean
format_expands_to_float_soa(const struct util_format_description *format_desc)
{
if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
@@ -876,7 +876,7 @@ format_expands_to_float_soa(const struct util_format_description *format_desc)
*
* e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
*/
-static INLINE void
+static inline void
lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
struct lp_type* type)
{
@@ -924,7 +924,7 @@ lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
*
* e.g. RGBA16F = 4x float, R3G3B2 = 3x byte
*/
-static INLINE void
+static inline void
lp_blend_type_from_format_desc(const struct util_format_description *format_desc,
struct lp_type* type)
{
@@ -996,7 +996,7 @@ lp_blend_type_from_format_desc(const struct util_format_description *format_desc
*
* but we try to avoid division and multiplication through shifts.
*/
-static INLINE LLVMValueRef
+static inline LLVMValueRef
scale_bits(struct gallivm_state *gallivm,
int src_bits,
int dst_bits,
@@ -1108,7 +1108,7 @@ scale_bits(struct gallivm_state *gallivm,
/**
* If RT is a smallfloat (needing denorms) format
*/
-static INLINE int
+static inline int
have_smallfloat_format(struct lp_type dst_type,
enum pipe_format format)
{
@@ -2880,7 +2880,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
/**
* Return the blend factor equivalent to a destination alpha of one.
*/
-static INLINE unsigned
+static inline unsigned
force_dst_alpha_one(unsigned factor, boolean clamped_zero)
{
switch(factor) {
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index 4b6c8a7a6a5..e1b51c9c9a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -77,7 +77,7 @@ unsigned __int64 __rdtsc();
#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
-static INLINE uint64_t
+static inline uint64_t
rdtsc(void)
{
uint32_t hi, lo;
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index 9fbd3a21648..3d315bb9a73 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -106,21 +106,21 @@ struct llvmpipe_transfer
/** cast wrappers */
-static INLINE struct llvmpipe_resource *
+static inline struct llvmpipe_resource *
llvmpipe_resource(struct pipe_resource *pt)
{
return (struct llvmpipe_resource *) pt;
}
-static INLINE const struct llvmpipe_resource *
+static inline const struct llvmpipe_resource *
llvmpipe_resource_const(const struct pipe_resource *pt)
{
return (const struct llvmpipe_resource *) pt;
}
-static INLINE struct llvmpipe_transfer *
+static inline struct llvmpipe_transfer *
llvmpipe_transfer(struct pipe_transfer *pt)
{
return (struct llvmpipe_transfer *) pt;
@@ -131,7 +131,7 @@ void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen);
void llvmpipe_init_context_resource_funcs(struct pipe_context *pipe);
-static INLINE boolean
+static inline boolean
llvmpipe_resource_is_texture(const struct pipe_resource *resource)
{
switch (resource->target) {
@@ -153,7 +153,7 @@ llvmpipe_resource_is_texture(const struct pipe_resource *resource)
}
-static INLINE boolean
+static inline boolean
llvmpipe_resource_is_1d(const struct pipe_resource *resource)
{
switch (resource->target) {
@@ -175,7 +175,7 @@ llvmpipe_resource_is_1d(const struct pipe_resource *resource)
}
-static INLINE unsigned
+static inline unsigned
llvmpipe_layer_stride(struct pipe_resource *resource,
unsigned level)
{
@@ -185,7 +185,7 @@ llvmpipe_layer_stride(struct pipe_resource *resource,
}
-static INLINE unsigned
+static inline unsigned
llvmpipe_resource_stride(struct pipe_resource *resource,
unsigned level)
{
diff --git a/src/gallium/drivers/nouveau/Makefile.am b/src/gallium/drivers/nouveau/Makefile.am
index d05f0a17ab4..c52d62e54a2 100644
--- a/src/gallium/drivers/nouveau/Makefile.am
+++ b/src/gallium/drivers/nouveau/Makefile.am
@@ -20,8 +20,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index ca3c806e92f..cce60550ae5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1153,8 +1153,8 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
switch (info->type) {
PROG_TYPE_CASE(VERTEX, VERTEX);
-// PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
-// PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
+ PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
+ PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
PROG_TYPE_CASE(COMPUTE, COMPUTE);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 529dcb9bdc2..3ddaeafebbd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -106,6 +106,7 @@ enum operation
OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
+ OP_AFETCH, // fetch base address of shader input (a[%r1+0x10])
OP_EXPORT,
OP_LINTERP,
OP_PINTERP,
@@ -372,7 +373,8 @@ enum SVSemantic
SV_SAMPLE_INDEX,
SV_SAMPLE_POS,
SV_SAMPLE_MASK,
- SV_TESS_FACTOR,
+ SV_TESS_OUTER,
+ SV_TESS_INNER,
SV_TESS_COORD,
SV_TID,
SV_CTAID,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
index 51b9225156b..fa8ee072a92 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
@@ -332,6 +332,9 @@ BasicBlock::splitBefore(Instruction *insn, bool attach)
BasicBlock *bb = new BasicBlock(func);
assert(!insn || insn->op != OP_PHI);
+ bb->joinAt = joinAt;
+ joinAt = NULL;
+
splitCommon(insn, bb, attach);
return bb;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
index 708c5b322ee..19418c0e0f1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -428,8 +428,7 @@ BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex)
{
Symbol *sym = new_Symbol(prog, FILE_SYSTEM_VALUE, 0);
- assert(svIndex < 4 ||
- (svName == SV_CLIP_DISTANCE || svName == SV_TESS_FACTOR));
+ assert(svIndex < 4 || svName == SV_CLIP_DISTANCE);
switch (svName) {
case SV_POSITION:
@@ -438,7 +437,9 @@ BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex)
case SV_POINT_SIZE:
case SV_POINT_COORD:
case SV_CLIP_DISTANCE:
- case SV_TESS_FACTOR:
+ case SV_TESS_OUTER:
+ case SV_TESS_INNER:
+ case SV_TESS_COORD:
sym->reg.type = TYPE_F32;
break;
default:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index dba56bf2716..2b9edcf9172 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -69,18 +69,6 @@ struct nv50_ir_varying
# define NV50_IR_DEBUG_REG_ALLOC 0
#endif
-#define NV50_SEMANTIC_CLIPDISTANCE (TGSI_SEMANTIC_COUNT + 0)
-#define NV50_SEMANTIC_TESSFACTOR (TGSI_SEMANTIC_COUNT + 7)
-#define NV50_SEMANTIC_TESSCOORD (TGSI_SEMANTIC_COUNT + 8)
-#define NV50_SEMANTIC_COUNT (TGSI_SEMANTIC_COUNT + 10)
-
-#define NV50_TESS_PART_FRACT_ODD 0
-#define NV50_TESS_PART_FRACT_EVEN 1
-#define NV50_TESS_PART_POW2 2
-#define NV50_TESS_PART_INTEGER 3
-
-#define NV50_PRIM_PATCHES PIPE_PRIM_MAX
-
struct nv50_ir_prog_symbol
{
uint32_t label;
@@ -151,10 +139,10 @@ struct nv50_ir_prog_info
} gp;
struct {
unsigned numColourResults;
- boolean writesDepth;
- boolean earlyFragTests;
- boolean separateFragData;
- boolean usesDiscard;
+ bool writesDepth;
+ bool earlyFragTests;
+ bool separateFragData;
+ bool usesDiscard;
} fp;
struct {
uint32_t inputOffset; /* base address for user args */
@@ -180,11 +168,11 @@ struct nv50_ir_prog_info
int8_t viewportId; /* output index of ViewportIndex */
uint8_t fragDepth; /* output index of FragDepth */
uint8_t sampleMask; /* output index of SampleMask */
- boolean sampleInterp; /* perform sample interp on all fp inputs */
+ bool sampleInterp; /* perform sample interp on all fp inputs */
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
- boolean fp64; /* program uses fp64 math */
- boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */
+ bool fp64; /* program uses fp64 math */
+ bool nv50styleSurfaces; /* generate gX[] access for raw buffers */
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index ab8bf2e5504..f06056f8f17 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -77,6 +77,7 @@ private:
void emitMOV(const Instruction *);
void emitINTERP(const Instruction *);
+ void emitAFETCH(const Instruction *);
void emitPFETCH(const Instruction *);
void emitVFETCH(const Instruction *);
void emitEXPORT(const Instruction *);
@@ -120,6 +121,8 @@ private:
void emitPIXLD(const Instruction *);
+ void emitBAR(const Instruction *);
+
void emitFlow(const Instruction *);
inline void defId(const ValueDef&, const int pos);
@@ -1250,6 +1253,13 @@ CodeEmitterGK110::emitPIXLD(const Instruction *i)
}
void
+CodeEmitterGK110::emitBAR(const Instruction *i)
+{
+ /* TODO */
+ emitNOP(i);
+}
+
+void
CodeEmitterGK110::emitFlow(const Instruction *i)
{
const FlowInstruction *f = i->asFlow();
@@ -1330,6 +1340,23 @@ CodeEmitterGK110::emitFlow(const Instruction *i)
}
void
+CodeEmitterGK110::emitAFETCH(const Instruction *i)
+{
+ uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
+
+ code[0] = 0x00000002 | (offset << 23);
+ code[1] = 0x7d000000 | (offset >> 9);
+
+ if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
+ code[1] |= 0x8;
+
+ emitPredicate(i);
+
+ defId(i->def(0), 2);
+ srcId(i->src(0).getIndirect(0), 10);
+}
+
+void
CodeEmitterGK110::emitPFETCH(const Instruction *i)
{
uint32_t prim = i->src(0).get()->reg.data.u32;
@@ -1698,6 +1725,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
case OP_EXPORT:
emitEXPORT(insn);
break;
+ case OP_AFETCH:
+ emitAFETCH(insn);
+ break;
case OP_PFETCH:
emitPFETCH(insn);
break;
@@ -1856,6 +1886,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
emitNOP(insn);
insn->join = 1;
break;
+ case OP_BAR:
+ emitBAR(insn);
+ break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 399a6f1db13..ef5c87d0437 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -174,6 +174,7 @@ private:
void emitALD();
void emitAST();
void emitISBERD();
+ void emitAL2P();
void emitIPA();
void emitPIXLD();
@@ -2204,6 +2205,17 @@ CodeEmitterGM107::emitISBERD()
}
void
+CodeEmitterGM107::emitAL2P()
+{
+ emitInsn (0xefa00000);
+ emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
+ emitO (0x20);
+ emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
+ emitGPR (0x08, insn->src(0).getIndirect(0));
+ emitGPR (0x00, insn->def(0));
+}
+
+void
CodeEmitterGM107::emitIPA()
{
int ipam = 0, ipas = 0;
@@ -2441,8 +2453,14 @@ CodeEmitterGM107::emitTXQ()
break;
}
- emitInsn (0xdf4a0000);
- emitField(0x24, 13, insn->tex.r);
+ if (insn->tex.rIndirectSrc >= 0) {
+ emitInsn (0xdf500000);
+ } else {
+ emitInsn (0xdf480000);
+ emitField(0x24, 13, insn->tex.r);
+ }
+
+ emitField(0x31, 1, insn->tex.liveOnly);
emitField(0x1f, 4, insn->tex.mask);
emitField(0x16, 6, type);
emitGPR (0x08, insn->src(0));
@@ -2753,6 +2771,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_PFETCH:
emitISBERD();
break;
+ case OP_AFETCH:
+ emitAL2P();
+ break;
case OP_LINTERP:
case OP_PINTERP:
emitIPA();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 1bfc8e32e84..67ea6df773c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -499,10 +499,14 @@ CodeEmitterNV50::emitForm_MAD(const Instruction *i)
setSrc(i, 2, 2);
if (i->getIndirect(0, 0)) {
- assert(!i->getIndirect(1, 0));
+ assert(!i->srcExists(1) || !i->getIndirect(1, 0));
+ assert(!i->srcExists(2) || !i->getIndirect(2, 0));
setAReg16(i, 0);
- } else {
+ } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
+ assert(!i->srcExists(2) || !i->getIndirect(2, 0));
setAReg16(i, 1);
+ } else {
+ setAReg16(i, 2);
}
}
@@ -546,7 +550,7 @@ CodeEmitterNV50::emitForm_MUL(const Instruction *i)
}
// usual immediate form
-// - 1 to 3 sources where last is immediate (rir, gir)
+// - 1 to 3 sources where second is immediate (rir, gir)
// - no address or predicate possible
void
CodeEmitterNV50::emitForm_IMM(const Instruction *i)
@@ -562,7 +566,7 @@ CodeEmitterNV50::emitForm_IMM(const Instruction *i)
if (Target::operationSrcNr[i->op] > 1) {
setSrc(i, 0, 0);
setImmediate(i, 1);
- setSrc(i, 2, 1);
+ // If there is another source, it has to be the same as the dest reg.
} else {
setImmediate(i, 0);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 472e3a84119..f607f3ba3ec 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -85,6 +85,7 @@ private:
void emitCCTL(const Instruction *);
void emitINTERP(const Instruction *);
+ void emitAFETCH(const Instruction *);
void emitPFETCH(const Instruction *);
void emitVFETCH(const Instruction *);
void emitEXPORT(const Instruction *);
@@ -1450,6 +1451,7 @@ CodeEmitterNVC0::emitBAR(const Instruction *i)
ImmediateValue *imm = i->getSrc(0)->asImm();
assert(imm);
code[0] |= imm->reg.data.u32 << 20;
+ code[1] |= 0x8000;
}
// thread count
@@ -1460,6 +1462,7 @@ CodeEmitterNVC0::emitBAR(const Instruction *i)
assert(imm);
code[0] |= imm->reg.data.u32 << 26;
code[1] |= imm->reg.data.u32 >> 6;
+ code[1] |= 0x4000;
}
if (i->srcExists(2) && (i->predSrc != 2)) {
@@ -1494,6 +1497,21 @@ CodeEmitterNVC0::emitBAR(const Instruction *i)
}
void
+CodeEmitterNVC0::emitAFETCH(const Instruction *i)
+{
+ code[0] = 0x00000006;
+ code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
+
+ if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
+ code[0] |= 0x200;
+
+ emitPredicate(i);
+
+ defId(i->def(0), 14);
+ srcId(i->src(0).getIndirect(0), 20);
+}
+
+void
CodeEmitterNVC0::emitPFETCH(const Instruction *i)
{
uint32_t prim = i->src(0).get()->reg.data.u32;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index ecd115f9807..4847a0f3355 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -372,6 +372,10 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
+ case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;
+ case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
+ case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
+ case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
default:
assert(0);
return nv50_ir::SV_CLOCK;
@@ -434,7 +438,6 @@ nv50_ir::DataType Instruction::inferSrcType() const
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_USNE:
case TGSI_OPCODE_USHR:
- case TGSI_OPCODE_UCMP:
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
@@ -827,7 +830,7 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
tgsi_dump(tokens, 0);
- mainTempsInLMem = FALSE;
+ mainTempsInLMem = false;
}
Source::~Source()
@@ -938,7 +941,7 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
info->prop.gp.instanceCount = prop->u[0].Data;
break;
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
- info->prop.fp.separateFragData = TRUE;
+ info->prop.fp.separateFragData = true;
break;
case TGSI_PROPERTY_FS_COORD_ORIGIN:
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
@@ -947,6 +950,24 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
info->io.genUserClip = -1;
break;
+ case TGSI_PROPERTY_TCS_VERTICES_OUT:
+ info->prop.tp.outputPatchSize = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_PRIM_MODE:
+ info->prop.tp.domain = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_SPACING:
+ info->prop.tp.partitioning = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
+ info->prop.tp.winding = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_POINT_MODE:
+ if (prop->u[0].Data)
+ info->prop.tp.outputPrim = PIPE_PRIM_POINTS;
+ else
+ info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
+ break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
@@ -1035,6 +1056,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
if (decl->Interp.Location || info->io.sampleInterp)
info->in[i].centroid = 1;
}
+
+ if (sn == TGSI_SEMANTIC_PATCH)
+ info->in[i].patch = 1;
+ if (sn == TGSI_SEMANTIC_PATCH)
+ info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
}
}
break;
@@ -1069,6 +1095,13 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_SEMANTIC_VIEWPORT_INDEX:
info->io.viewportId = i;
break;
+ case TGSI_SEMANTIC_PATCH:
+ info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
+ /* fallthrough */
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_TESSINNER:
+ info->out[i].patch = 1;
+ break;
default:
break;
}
@@ -1092,6 +1125,13 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
info->sv[i].sn = sn;
info->sv[i].si = si;
info->sv[i].input = inferSysValDirection(sn);
+
+ switch (sn) {
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_TESSINNER:
+ info->sv[i].patch = 1;
+ break;
+ }
}
break;
case TGSI_FILE_RESOURCE:
@@ -1156,7 +1196,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
} else
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
- mainTempsInLMem = TRUE;
+ mainTempsInLMem = true;
}
}
@@ -1164,12 +1204,22 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
Instruction::SrcRegister src = insn.getSrc(s);
if (src.getFile() == TGSI_FILE_TEMPORARY) {
if (src.isIndirect(0))
- mainTempsInLMem = TRUE;
+ mainTempsInLMem = true;
} else
if (src.getFile() == TGSI_FILE_RESOURCE) {
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
+ } else
+ if (src.getFile() == TGSI_FILE_OUTPUT) {
+ if (src.isIndirect(0)) {
+ // We don't know which one is accessed, just mark everything for
+ // reading. This is an extremely unlikely occurrence.
+ for (unsigned i = 0; i < info->numOutputs; ++i)
+ info->out[i].oread = 1;
+ } else {
+ info->out[src.getIndex(0)].oread = 1;
+ }
}
if (src.getFile() != TGSI_FILE_INPUT)
continue;
@@ -1246,6 +1296,7 @@ private:
Value *shiftAddress(Value *);
Value *getVertexBase(int s);
+ Value *getOutputBase(int s);
DataArray *getArrayForFile(unsigned file, int idx);
Value *fetchSrc(int s, int c);
Value *acquireDst(int d, int c);
@@ -1343,6 +1394,8 @@ private:
Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
uint8_t vtxBaseValid;
+ Value *outBase; // base address of vertex out patch (for TCP)
+
Stack condBBs; // fork BB, then else clause BB
Stack joinBBs; // fork BB, for inserting join ops on ENDIF
Stack loopBBs; // loop headers
@@ -1476,6 +1529,22 @@ Converter::getVertexBase(int s)
}
Value *
+Converter::getOutputBase(int s)
+{
+ assert(s < 5);
+ if (!(vtxBaseValid & (1 << s))) {
+ Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
+ if (tgsi.getSrc(s).isIndirect(1))
+ offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
+ fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
+ offset);
+ vtxBaseValid |= 1 << s;
+ vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
+ }
+ return vtxBase[s];
+}
+
+Value *
Converter::fetchSrc(int s, int c)
{
Value *res;
@@ -1488,6 +1557,9 @@ Converter::fetchSrc(int s, int c)
if (src.is2D()) {
switch (src.getFile()) {
+ case TGSI_FILE_OUTPUT:
+ dimRel = getOutputBase(s);
+ break;
case TGSI_FILE_INPUT:
dimRel = getVertexBase(s);
break;
@@ -1542,6 +1614,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
const int idx2d = src.is2D() ? src.getIndex(1) : 0;
const int idx = src.getIndex(0);
const int swz = src.getSwizzle(c);
+ Instruction *ld;
switch (src.getFile()) {
case TGSI_FILE_IMMEDIATE:
@@ -1569,13 +1642,19 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
if (ptr)
return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
}
- return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
+ ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
+ ld->perPatch = info->in[idx].patch;
+ return ld->getDef(0);
case TGSI_FILE_OUTPUT:
- assert(!"load from output file");
- return NULL;
+ assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
+ ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
+ ld->perPatch = info->out[idx].patch;
+ return ld->getDef(0);
case TGSI_FILE_SYSTEM_VALUE:
assert(!ptr);
- return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
+ ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
+ ld->perPatch = info->sv[idx].patch;
+ return ld->getDef(0);
default:
return getArrayForFile(src.getFile(), idx2d)->load(
sub.cur->values, idx, swz, shiftAddress(ptr));
@@ -1645,7 +1724,8 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
viewport != NULL)
mkOp1(OP_MOV, TYPE_U32, viewport, val);
else
- mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
+ mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =
+ info->out[idx].patch;
}
} else
if (f == TGSI_FILE_TEMPORARY ||
@@ -1687,6 +1767,7 @@ Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
join->fixed = 1;
conv->insertHead(join);
+ assert(!fork->joinAt);
fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
fork->insertBefore(fork->getExit(), fork->joinAt);
}
@@ -1728,7 +1809,7 @@ Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
}
tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
- setTexRS(tex, c, 1, -1);
+ setTexRS(tex, ++c, 1, -1);
bb->insertTail(tex);
}
@@ -2569,6 +2650,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
}
break;
case TGSI_OPCODE_UCMP:
+ srcTy = TYPE_U32;
+ /* fallthrough */
case TGSI_OPCODE_CMP:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
@@ -3282,10 +3365,21 @@ Converter::run()
clipVtx[c] = getScratch();
}
- if (prog->getType() == Program::TYPE_FRAGMENT) {
+ switch (prog->getType()) {
+ case Program::TYPE_TESSELLATION_CONTROL:
+ outBase = mkOp2v(
+ OP_SUB, TYPE_U32, getSSA(),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+ break;
+ case Program::TYPE_FRAGMENT: {
Symbol *sv = mkSysVal(SV_POSITION, 3);
fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+ break;
+ }
+ default:
+ break;
}
if (info->io.viewportId >= 0)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 596ac95d489..1f3fce2bb9a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -176,7 +176,7 @@ GM107LoweringPass::handlePOPCNT(Instruction *i)
i->getSrc(0), i->getSrc(1));
i->setSrc(0, tmp);
i->setSrc(1, NULL);
- return TRUE;
+ return true;
}
//
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 2c7f7e326b2..bea293bac99 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -871,6 +871,7 @@ NV50LoweringPreSSA::handleTXL(TexInstruction *i)
BasicBlock *joinBB = i->bb->splitAfter(i);
bld.setPosition(currBB, true);
+ assert(!currBB->joinAt);
currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
for (int l = 0; l <= 3; ++l) {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 7a5d1ce0299..c3c302da5c8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -559,6 +559,12 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
} else
if (i->isNop()) {
bb->remove(i);
+ } else
+ if (i->op == OP_BAR && i->subOp == NV50_IR_SUBOP_BAR_SYNC &&
+ prog->getType() != Program::TYPE_COMPUTE) {
+ // It seems like barriers are never required for tessellation since
+ // the warp size is 32, and there are always at most 32 tcs threads.
+ bb->remove(i);
} else {
// TODO: Move this to before register allocation for operations that
// need the $c register !
@@ -956,7 +962,43 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
bool
NVC0LoweringPass::handleTXQ(TexInstruction *txq)
{
- // TODO: indirect resource/sampler index
+ if (txq->tex.rIndirectSrc < 0)
+ return true;
+
+ Value *ticRel = txq->getIndirectR();
+ const int chipset = prog->getTarget()->getChipset();
+
+ txq->setIndirectS(NULL);
+ txq->tex.sIndirectSrc = -1;
+
+ assert(ticRel);
+
+ if (chipset < NVISA_GK104_CHIPSET) {
+ LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
+
+ txq->setSrc(txq->tex.rIndirectSrc, NULL);
+ if (txq->tex.r)
+ ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
+ ticRel, bld.mkImm(txq->tex.r));
+
+ bld.mkOp2(OP_SHL, TYPE_U32, src, ticRel, bld.mkImm(0x17));
+
+ txq->moveSources(0, 1);
+ txq->setSrc(0, src);
+ } else {
+ Value *hnd = loadTexHandle(
+ bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+ txq->getIndirectR(), bld.mkImm(2)),
+ txq->tex.r);
+ txq->tex.r = 0xff;
+ txq->tex.s = 0x1f;
+
+ txq->setIndirectR(NULL);
+ txq->moveSources(0, 1);
+ txq->setSrc(0, hnd);
+ txq->tex.rIndirectSrc = 0;
+ }
+
return true;
}
@@ -1485,6 +1527,10 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
i->op = OP_MOV;
i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0));
}
+ if (sv == SV_VERTEX_COUNT) {
+ bld.setPosition(i, true);
+ bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0), bld.mkImm(0x808));
+ }
return true;
}
@@ -1554,7 +1600,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
break;
default:
- if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
+ if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
ld = bld.mkFetch(i->getDef(0), i->dType,
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
@@ -1705,6 +1751,7 @@ NVC0LoweringPass::checkPredicate(Instruction *insn)
bool
NVC0LoweringPass::visit(Instruction *i)
{
+ bool ret = true;
bld.setPosition(i, false);
if (i->cc != CC_ALWAYS)
@@ -1736,7 +1783,8 @@ NVC0LoweringPass::visit(Instruction *i)
case OP_SQRT:
return handleSQRT(i);
case OP_EXPORT:
- return handleEXPORT(i);
+ ret = handleEXPORT(i);
+ break;
case OP_EMIT:
case OP_RESTART:
return handleOUT(i);
@@ -1775,6 +1823,9 @@ NVC0LoweringPass::visit(Instruction *i)
i->setIndirect(0, 0, ptr);
i->subOp = NV50_IR_SUBOP_LDC_IS;
}
+ } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
+ assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
+ i->op = OP_VFETCH;
}
break;
case OP_ATOM:
@@ -1796,7 +1847,20 @@ NVC0LoweringPass::visit(Instruction *i)
default:
break;
}
- return true;
+
+ /* Kepler+ has a special opcode to compute a new base address to be used
+ * for indirect loads.
+ */
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET && !i->perPatch &&
+ (i->op == OP_VFETCH || i->op == OP_EXPORT) && i->src(0).isIndirect(0)) {
+ Instruction *afetch = bld.mkOp1(OP_AFETCH, TYPE_U32, bld.getSSA(),
+ cloneShallow(func, i->getSrc(0)));
+ afetch->setIndirect(0, 0, i->getIndirect(0, 0));
+ i->src(0).get()->reg.data.offset = 0;
+ i->setIndirect(0, 0, afetch->getDef(0));
+ }
+
+ return ret;
}
bool
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index ae739eeda83..cea96dcdfc5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -608,9 +608,12 @@ ConstantFolding::expr(Instruction *i,
case OP_FMA: {
i->op = OP_ADD;
+ /* Move the immediate to the second arg, otherwise the ADD operation
+ * won't be emittable
+ */
i->setSrc(1, i->getSrc(0));
- i->src(1).mod = i->src(2).mod;
i->setSrc(0, i->getSrc(2));
+ i->src(0).mod = i->src(2).mod;
i->setSrc(2, NULL);
ImmediateValue src0;
@@ -2082,6 +2085,8 @@ MemoryOpt::runOpt(BasicBlock *bb)
}
if (ldst->getPredicate()) // TODO: handle predicated ld/st
continue;
+ if (ldst->perPatch) // TODO: create separate per-patch lists
+ continue;
if (isLoad) {
DataFile file = ldst->src(0).getFile();
@@ -2515,6 +2520,8 @@ Instruction::isResultEqual(const Instruction *that) const
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:
return true;
+ case FILE_SHADER_OUTPUT:
+ return bb->getProgram()->getType() == Program::TYPE_TESSELLATION_EVAL;
default:
return false;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index ef3de6ff92a..9ebdc6586db 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -135,6 +135,7 @@ const char *operationStr[OP_LAST + 1] =
"membar",
"vfetch",
"pfetch",
+ "afetch",
"export",
"linterp",
"pinterp",
@@ -258,7 +259,8 @@ static const char *SemanticStr[SV_LAST + 1] =
"SAMPLE_INDEX",
"SAMPLE_POS",
"SAMPLE_MASK",
- "TESS_FACTOR",
+ "TESS_OUTER",
+ "TESS_INNER",
"TESS_COORD",
"TID",
"CTAID",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 898653c9953..78bc97f4397 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -2066,6 +2066,8 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
condenseDefs(i);
if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
addHazard(i, i->src(0).getIndirect(0));
+ if (i->src(0).isIndirect(1) && typeSizeof(i->dType) >= 8)
+ addHazard(i, i->src(0).getIndirect(1));
} else
if (i->op == OP_UNION ||
i->op == OP_MERGE ||
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 7992f539782..fe530c76b62 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -41,7 +41,7 @@ const uint8_t Target::operationSrcNr[] =
0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
0, 0, 0, // PRERET,CONT,BREAK
0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
- 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
+ 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
1, 1, // EMIT, RESTART
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
@@ -96,8 +96,8 @@ const OpClass Target::operationClass[] =
OPCLASS_FLOW, OPCLASS_FLOW,
// MEMBAR
OPCLASS_CONTROL,
- // VFETCH, PFETCH, EXPORT
- OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
+ // VFETCH, PFETCH, AFETCH, EXPORT
+ OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
// LINTERP, PINTERP
OPCLASS_SFU, OPCLASS_SFU,
// EMIT, RESTART
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index ca545a6024a..f3ddcaa5199 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -118,7 +118,7 @@ void TargetNV50::initOpInfo()
static const uint32_t shortForm[(OP_LAST + 31) / 32] =
{
// MOV,ADD,SUB,MUL,MAD,SAD,L/PINTERP,RCP,TEX,TXF
- 0x00014e40, 0x00000040, 0x00000498, 0x00000000
+ 0x00014e40, 0x00000040, 0x00000930, 0x00000000
};
static const operation noDestList[] =
{
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 7d4a859dde4..27df0eba66b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -286,7 +286,8 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4;
case SV_POINT_COORD: return 0x2e0 + idx * 4;
case SV_FACE: return 0x3fc;
- case SV_TESS_FACTOR: return 0x000 + idx * 4;
+ case SV_TESS_OUTER: return 0x000 + idx * 4;
+ case SV_TESS_INNER: return 0x010 + idx * 4;
case SV_TESS_COORD: return 0x2f0 + idx * 4;
case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0;
case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0;
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 09cdbb53ecb..67e181e803a 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -22,13 +22,13 @@ struct nouveau_transfer {
uint32_t offset;
};
-static INLINE struct nouveau_transfer *
+static inline struct nouveau_transfer *
nouveau_transfer(struct pipe_transfer *transfer)
{
return (struct nouveau_transfer *)transfer;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_malloc(struct nv04_resource *buf)
{
if (!buf->data)
@@ -36,16 +36,11 @@ nouveau_buffer_malloc(struct nv04_resource *buf)
return !!buf->data;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_allocate(struct nouveau_screen *screen,
struct nv04_resource *buf, unsigned domain)
{
- uint32_t size = buf->base.width0;
-
- if (buf->base.bind & (PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_COMPUTE_RESOURCE |
- PIPE_BIND_SHADER_RESOURCE))
- size = align(size, 0x100);
+ uint32_t size = align(buf->base.width0, 0x100);
if (domain == NOUVEAU_BO_VRAM) {
buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
@@ -58,12 +53,12 @@ nouveau_buffer_allocate(struct nouveau_screen *screen,
buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
&buf->bo, &buf->offset);
if (!buf->bo)
- return FALSE;
+ return false;
NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
} else {
assert(domain == 0);
if (!nouveau_buffer_malloc(buf))
- return FALSE;
+ return false;
}
buf->domain = domain;
if (buf->bo)
@@ -71,10 +66,10 @@ nouveau_buffer_allocate(struct nouveau_screen *screen,
util_range_set_empty(&buf->valid_buffer_range);
- return TRUE;
+ return true;
}
-static INLINE void
+static inline void
release_allocation(struct nouveau_mm_allocation **mm,
struct nouveau_fence *fence)
{
@@ -82,7 +77,7 @@ release_allocation(struct nouveau_mm_allocation **mm,
(*mm) = NULL;
}
-INLINE void
+inline void
nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
{
nouveau_bo_ref(NULL, &buf->bo);
@@ -98,7 +93,7 @@ nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
buf->domain = 0;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_reallocate(struct nouveau_screen *screen,
struct nv04_resource *buf, unsigned domain)
{
@@ -139,13 +134,13 @@ nouveau_buffer_destroy(struct pipe_screen *pscreen,
*/
static uint8_t *
nouveau_transfer_staging(struct nouveau_context *nv,
- struct nouveau_transfer *tx, boolean permit_pb)
+ struct nouveau_transfer *tx, bool permit_pb)
{
const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
const unsigned size = align(tx->base.box.width, 4) + adj;
if (!nv->push_data)
- permit_pb = FALSE;
+ permit_pb = false;
if ((size <= NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD) && permit_pb) {
tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
@@ -167,7 +162,7 @@ nouveau_transfer_staging(struct nouveau_context *nv,
* buffer. Also updates buf->data if present.
*
* Maybe just migrate to GART right away if we actually need to do this. */
-static boolean
+static bool
nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
{
struct nv04_resource *buf = nv04_resource(tx->base.resource);
@@ -180,12 +175,12 @@ nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
buf->bo, buf->offset + base, buf->domain, size);
if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client))
- return FALSE;
+ return false;
if (buf->data)
memcpy(buf->data + base, tx->map, size);
- return TRUE;
+ return true;
}
static void
@@ -195,7 +190,7 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
struct nv04_resource *buf = nv04_resource(tx->base.resource);
uint8_t *data = tx->map + offset;
const unsigned base = tx->base.box.x + offset;
- const boolean can_cb = !((base | size) & 3);
+ const bool can_cb = !((base | size) & 3);
if (buf->data)
memcpy(data, buf->data + base, size);
@@ -224,32 +219,32 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
/* Does a CPU wait for the buffer's backing data to become reliably accessible
* for write/read by waiting on the buffer's relevant fences.
*/
-static INLINE boolean
+static inline bool
nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
{
if (rw == PIPE_TRANSFER_READ) {
if (!buf->fence_wr)
- return TRUE;
+ return true;
NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
!nouveau_fence_signalled(buf->fence_wr));
if (!nouveau_fence_wait(buf->fence_wr))
- return FALSE;
+ return false;
} else {
if (!buf->fence)
- return TRUE;
+ return true;
NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
!nouveau_fence_signalled(buf->fence));
if (!nouveau_fence_wait(buf->fence))
- return FALSE;
+ return false;
nouveau_fence_ref(NULL, &buf->fence);
}
nouveau_fence_ref(NULL, &buf->fence_wr);
- return TRUE;
+ return true;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
{
if (rw == PIPE_TRANSFER_READ)
@@ -258,7 +253,7 @@ nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
return (buf->fence && !nouveau_fence_signalled(buf->fence));
}
-static INLINE void
+static inline void
nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
struct pipe_resource *resource,
const struct pipe_box *box,
@@ -280,7 +275,7 @@ nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
tx->map = NULL;
}
-static INLINE void
+static inline void
nouveau_buffer_transfer_del(struct nouveau_context *nv,
struct nouveau_transfer *tx)
{
@@ -297,11 +292,11 @@ nouveau_buffer_transfer_del(struct nouveau_context *nv,
}
/* Creates a cache in system memory of the buffer data. */
-static boolean
+static bool
nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
{
struct nouveau_transfer tx;
- boolean ret;
+ bool ret;
tx.base.resource = &buf->base;
tx.base.box.x = 0;
tx.base.box.width = buf->base.width0;
@@ -310,13 +305,13 @@ nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
if (!buf->data)
if (!nouveau_buffer_malloc(buf))
- return FALSE;
+ return false;
if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
- return TRUE;
+ return true;
nv->stats.buf_cache_count++;
- if (!nouveau_transfer_staging(nv, &tx, FALSE))
- return FALSE;
+ if (!nouveau_transfer_staging(nv, &tx, false))
+ return false;
ret = nouveau_transfer_read(nv, &tx);
if (ret) {
@@ -335,15 +330,15 @@ nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
* resource. This can be useful if we would otherwise have to wait for a read
* operation to complete on this data.
*/
-static INLINE boolean
+static inline bool
nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
{
if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
- return FALSE;
+ return false;
if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
- return FALSE;
+ return false;
if (unlikely(usage & PIPE_TRANSFER_PERSISTENT))
- return FALSE;
+ return false;
return buf->mm && nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE);
}
@@ -413,7 +408,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
* back into VRAM on unmap. */
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
- nouveau_transfer_staging(nv, tx, TRUE);
+ nouveau_transfer_staging(nv, tx, true);
} else {
if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
/* The GPU is currently writing to this buffer. Copy its current
@@ -424,13 +419,13 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
align_free(buf->data);
buf->data = NULL;
}
- nouveau_transfer_staging(nv, tx, FALSE);
+ nouveau_transfer_staging(nv, tx, false);
nouveau_transfer_read(nv, tx);
} else {
/* The buffer is currently idle. Create a staging area for writes,
* and make sure that the cached data is up-to-date. */
if (usage & PIPE_TRANSFER_WRITE)
- nouveau_transfer_staging(nv, tx, TRUE);
+ nouveau_transfer_staging(nv, tx, true);
if (!buf->data)
nouveau_buffer_cache(nv, buf);
}
@@ -482,7 +477,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
/* The whole range is being discarded, so it doesn't matter what was
* there before. No need to copy anything over. */
- nouveau_transfer_staging(nv, tx, TRUE);
+ nouveau_transfer_staging(nv, tx, true);
map = tx->map;
} else
if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
@@ -493,7 +488,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
} else {
/* It is expected that the returned buffer be a representation of the
* data in question, so we must copy it over from the buffer. */
- nouveau_transfer_staging(nv, tx, TRUE);
+ nouveau_transfer_staging(nv, tx, true);
if (tx->map)
memcpy(tx->map, map, box->width);
map = tx->map;
@@ -544,7 +539,7 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
const uint8_t bind = buf->base.bind;
/* make sure we invalidate dedicated caches */
if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
- nv->vbo_dirty = TRUE;
+ nv->vbo_dirty = true;
}
util_range_add(&buf->valid_buffer_range,
@@ -639,7 +634,7 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
{
struct nouveau_screen *screen = nouveau_screen(pscreen);
struct nv04_resource *buffer;
- boolean ret;
+ bool ret;
buffer = CALLOC_STRUCT(nv04_resource);
if (!buffer)
@@ -683,7 +678,7 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
}
ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
- if (ret == FALSE)
+ if (ret == false)
goto fail;
if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
@@ -730,20 +725,20 @@ nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
return &buffer->base;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
struct nouveau_bo *bo, unsigned offset, unsigned size)
{
if (!nouveau_buffer_malloc(buf))
- return FALSE;
+ return false;
if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client))
- return FALSE;
+ return false;
memcpy(buf->data, (uint8_t *)bo->map + offset, size);
- return TRUE;
+ return true;
}
/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
-boolean
+bool
nouveau_buffer_migrate(struct nouveau_context *nv,
struct nv04_resource *buf, const unsigned new_domain)
{
@@ -758,7 +753,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
if (!nouveau_buffer_allocate(screen, buf, new_domain))
- return FALSE;
+ return false;
ret = nouveau_bo_map(buf->bo, 0, nv->client);
if (ret)
return ret;
@@ -771,7 +766,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
if (new_domain == NOUVEAU_BO_VRAM) {
/* keep a system memory copy of our data in case we hit a fallback */
if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
- return FALSE;
+ return false;
if (nouveau_mesa_debug)
debug_printf("migrating %u KiB to VRAM\n", size / 1024);
}
@@ -792,28 +787,28 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
struct nouveau_transfer tx;
if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
- return FALSE;
+ return false;
tx.base.resource = &buf->base;
tx.base.box.x = 0;
tx.base.box.width = buf->base.width0;
tx.bo = NULL;
tx.map = NULL;
- if (!nouveau_transfer_staging(nv, &tx, FALSE))
- return FALSE;
+ if (!nouveau_transfer_staging(nv, &tx, false))
+ return false;
nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
nouveau_buffer_transfer_del(nv, &tx);
} else
- return FALSE;
+ return false;
assert(buf->domain == new_domain);
- return TRUE;
+ return true;
}
/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
* We'd like to only allocate @size bytes here, but then we'd have to rebase
* the vertex indices ...
*/
-boolean
+bool
nouveau_user_buffer_upload(struct nouveau_context *nv,
struct nv04_resource *buf,
unsigned base, unsigned size)
@@ -825,20 +820,20 @@ nouveau_user_buffer_upload(struct nouveau_context *nv,
buf->base.width0 = base + size;
if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
- return FALSE;
+ return false;
ret = nouveau_bo_map(buf->bo, 0, nv->client);
if (ret)
- return FALSE;
+ return false;
memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
- return TRUE;
+ return true;
}
/* Scratch data allocation. */
-static INLINE int
+static inline int
nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
unsigned size)
{
@@ -875,7 +870,7 @@ nouveau_scratch_runout_release(struct nouveau_context *nv)
/* Allocate an extra bo if we can't fit everything we need simultaneously.
* (Could happen for very large user arrays.)
*/
-static INLINE boolean
+static inline bool
nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
{
int ret;
@@ -909,7 +904,7 @@ nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
/* Continue to next scratch buffer, if available (no wrapping, large enough).
* Allocate it if it has not yet been created.
*/
-static INLINE boolean
+static inline bool
nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
{
struct nouveau_bo *bo;
@@ -917,14 +912,14 @@ nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
- return FALSE;
+ return false;
nv->scratch.id = i;
bo = nv->scratch.bo[i];
if (!bo) {
ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
if (ret)
- return FALSE;
+ return false;
nv->scratch.bo[i] = bo;
}
nv->scratch.current = bo;
@@ -937,10 +932,10 @@ nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
return !ret;
}
-static boolean
+static bool
nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
{
- boolean ret;
+ bool ret;
ret = nouveau_scratch_next(nv, min_size);
if (!ret)
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h
index de77f481da3..7e6a6cc804b 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -58,7 +58,7 @@ nouveau_copy_buffer(struct nouveau_context *,
struct nv04_resource *dst, unsigned dst_pos,
struct nv04_resource *src, unsigned src_pos, unsigned size);
-boolean
+bool
nouveau_buffer_migrate(struct nouveau_context *,
struct nv04_resource *, unsigned domain);
@@ -66,20 +66,20 @@ void *
nouveau_resource_map_offset(struct nouveau_context *, struct nv04_resource *,
uint32_t offset, uint32_t flags);
-static INLINE void
+static inline void
nouveau_resource_unmap(struct nv04_resource *res)
{
/* no-op */
}
-static INLINE struct nv04_resource *
+static inline struct nv04_resource *
nv04_resource(struct pipe_resource *resource)
{
return (struct nv04_resource *)resource;
}
/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
-static INLINE boolean
+static inline bool
nouveau_resource_mapped_by_gpu(struct pipe_resource *resource)
{
return nv04_resource(resource)->domain != 0;
@@ -93,7 +93,7 @@ struct pipe_resource *
nouveau_user_buffer_create(struct pipe_screen *screen, void *ptr,
unsigned bytes, unsigned usage);
-boolean
+bool
nouveau_user_buffer_upload(struct nouveau_context *, struct nv04_resource *,
unsigned base, unsigned size);
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
index c2ba0159afe..24deb7ee4c0 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -13,7 +13,7 @@ struct nouveau_context {
struct nouveau_client *client;
struct nouveau_pushbuf *pushbuf;
- boolean vbo_dirty;
+ bool vbo_dirty;
void (*copy_data)(struct nouveau_context *,
struct nouveau_bo *dst, unsigned, unsigned,
@@ -53,7 +53,7 @@ struct nouveau_context {
} stats;
};
-static INLINE struct nouveau_context *
+static inline struct nouveau_context *
nouveau_context(struct pipe_context *pipe)
{
return (struct nouveau_context *)pipe;
@@ -69,7 +69,7 @@ nouveau_scratch_runout_release(struct nouveau_context *);
* because we don't want to un-bo_ref each allocation every time. This is less
* work, and we need the wrap index anyway for extreme situations.
*/
-static INLINE void
+static inline void
nouveau_scratch_done(struct nouveau_context *nv)
{
nv->scratch.wrap = nv->scratch.id;
@@ -84,7 +84,7 @@ void *
nouveau_scratch_get(struct nouveau_context *, unsigned size, uint64_t *gpu_addr,
struct nouveau_bo **);
-static INLINE void
+static inline void
nouveau_context_destroy(struct nouveau_context *ctx)
{
int i;
@@ -96,7 +96,7 @@ nouveau_context_destroy(struct nouveau_context *ctx)
FREE(ctx);
}
-static INLINE void
+static inline void
nouveau_context_update_frame_stats(struct nouveau_context *nv)
{
nv->stats.buf_cache_frame <<= 1;
@@ -104,7 +104,7 @@ nouveau_context_update_frame_stats(struct nouveau_context *nv)
nv->stats.buf_cache_count = 0;
nv->stats.buf_cache_frame |= 1;
if ((nv->stats.buf_cache_frame & 0xf) == 0xf)
- nv->screen->hint_buf_keep_sysmem_copy = TRUE;
+ nv->screen->hint_buf_keep_sysmem_copy = true;
}
}
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
index 17a5174594d..abcdb479954 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -28,13 +28,13 @@
#include <sched.h>
#endif
-boolean
+bool
nouveau_fence_new(struct nouveau_screen *screen, struct nouveau_fence **fence,
- boolean emit)
+ bool emit)
{
*fence = CALLOC_STRUCT(nouveau_fence);
if (!*fence)
- return FALSE;
+ return false;
(*fence)->screen = screen;
(*fence)->ref = 1;
@@ -43,7 +43,7 @@ nouveau_fence_new(struct nouveau_screen *screen, struct nouveau_fence **fence,
if (emit)
nouveau_fence_emit(*fence);
- return TRUE;
+ return true;
}
static void
@@ -58,7 +58,7 @@ nouveau_fence_trigger_work(struct nouveau_fence *fence)
}
}
-boolean
+bool
nouveau_fence_work(struct nouveau_fence *fence,
void (*func)(void *), void *data)
{
@@ -66,16 +66,16 @@ nouveau_fence_work(struct nouveau_fence *fence,
if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
func(data);
- return TRUE;
+ return true;
}
work = CALLOC_STRUCT(nouveau_fence_work);
if (!work)
- return FALSE;
+ return false;
work->func = func;
work->data = data;
LIST_ADD(&work->list, &fence->work);
- return TRUE;
+ return true;
}
void
@@ -132,7 +132,7 @@ nouveau_fence_del(struct nouveau_fence *fence)
}
void
-nouveau_fence_update(struct nouveau_screen *screen, boolean flushed)
+nouveau_fence_update(struct nouveau_screen *screen, bool flushed)
{
struct nouveau_fence *fence;
struct nouveau_fence *next = NULL;
@@ -167,21 +167,21 @@ nouveau_fence_update(struct nouveau_screen *screen, boolean flushed)
#define NOUVEAU_FENCE_MAX_SPINS (1 << 31)
-boolean
+bool
nouveau_fence_signalled(struct nouveau_fence *fence)
{
struct nouveau_screen *screen = fence->screen;
if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
- return TRUE;
+ return true;
if (fence->state >= NOUVEAU_FENCE_STATE_EMITTED)
- nouveau_fence_update(screen, FALSE);
+ nouveau_fence_update(screen, false);
return fence->state == NOUVEAU_FENCE_STATE_SIGNALLED;
}
-boolean
+bool
nouveau_fence_wait(struct nouveau_fence *fence)
{
struct nouveau_screen *screen = fence->screen;
@@ -195,16 +195,16 @@ nouveau_fence_wait(struct nouveau_fence *fence)
if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
- return FALSE;
+ return false;
if (fence == screen->fence.current)
nouveau_fence_next(screen);
do {
- nouveau_fence_update(screen, FALSE);
+ nouveau_fence_update(screen, false);
if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
- return TRUE;
+ return true;
if (!spins)
NOUVEAU_DRV_STAT(screen, any_non_kernel_fence_sync_count, 1);
spins++;
@@ -218,7 +218,7 @@ nouveau_fence_wait(struct nouveau_fence *fence)
fence->sequence,
screen->fence.sequence_ack, screen->fence.sequence);
- return FALSE;
+ return false;
}
void
@@ -229,5 +229,5 @@ nouveau_fence_next(struct nouveau_screen *screen)
nouveau_fence_ref(NULL, &screen->fence.current);
- nouveau_fence_new(screen, &screen->fence.current, FALSE);
+ nouveau_fence_new(screen, &screen->fence.current, false);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h
index 7bb132a5d15..a1587051b0f 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -29,15 +29,15 @@ struct nouveau_fence {
void nouveau_fence_emit(struct nouveau_fence *);
void nouveau_fence_del(struct nouveau_fence *);
-boolean nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
- boolean emit);
-boolean nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
-void nouveau_fence_update(struct nouveau_screen *, boolean flushed);
-void nouveau_fence_next(struct nouveau_screen *);
-boolean nouveau_fence_wait(struct nouveau_fence *);
-boolean nouveau_fence_signalled(struct nouveau_fence *);
-
-static INLINE void
+bool nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
+ bool emit);
+bool nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
+void nouveau_fence_update(struct nouveau_screen *, bool flushed);
+void nouveau_fence_next(struct nouveau_screen *);
+bool nouveau_fence_wait(struct nouveau_fence *);
+bool nouveau_fence_signalled(struct nouveau_fence *);
+
+static inline void
nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
{
if (fence)
@@ -51,7 +51,7 @@ nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
*ref = fence;
}
-static INLINE struct nouveau_fence *
+static inline struct nouveau_fence *
nouveau_fence(struct pipe_fence_handle *fence)
{
return (struct nouveau_fence *)fence;
diff --git a/src/gallium/drivers/nouveau/nouveau_gldefs.h b/src/gallium/drivers/nouveau/nouveau_gldefs.h
index ff97aaa9af0..1538c7b6e57 100644
--- a/src/gallium/drivers/nouveau/nouveau_gldefs.h
+++ b/src/gallium/drivers/nouveau/nouveau_gldefs.h
@@ -1,7 +1,7 @@
#ifndef __NOUVEAU_GLDEFS_H__
#define __NOUVEAU_GLDEFS_H__
-static INLINE unsigned
+static inline unsigned
nvgl_blend_func(unsigned factor)
{
switch (factor) {
@@ -40,7 +40,7 @@ nvgl_blend_func(unsigned factor)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_blend_eqn(unsigned func)
{
switch (func) {
@@ -59,7 +59,7 @@ nvgl_blend_eqn(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_logicop_func(unsigned func)
{
switch (func) {
@@ -100,7 +100,7 @@ nvgl_logicop_func(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_comparison_op(unsigned op)
{
switch (op) {
@@ -125,7 +125,7 @@ nvgl_comparison_op(unsigned op)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_polygon_mode(unsigned mode)
{
switch (mode) {
@@ -140,7 +140,7 @@ nvgl_polygon_mode(unsigned mode)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_stencil_op(unsigned op)
{
switch (op) {
@@ -165,7 +165,7 @@ nvgl_stencil_op(unsigned op)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_primitive(unsigned prim) {
switch (prim) {
case PIPE_PRIM_POINTS:
diff --git a/src/gallium/drivers/nouveau/nouveau_mm.c b/src/gallium/drivers/nouveau/nouveau_mm.c
index 9c454c56db0..43b3d99f48a 100644
--- a/src/gallium/drivers/nouveau/nouveau_mm.c
+++ b/src/gallium/drivers/nouveau/nouveau_mm.c
@@ -70,7 +70,7 @@ mm_slab_alloc(struct mm_slab *slab)
return -1;
}
-static INLINE void
+static inline void
mm_slab_free(struct mm_slab *slab, int i)
{
assert(i < slab->count);
@@ -79,7 +79,7 @@ mm_slab_free(struct mm_slab *slab, int i)
assert(slab->free <= slab->count);
}
-static INLINE int
+static inline int
mm_get_order(uint32_t size)
{
int s = __builtin_clz(size) ^ 31;
@@ -104,7 +104,7 @@ mm_bucket_by_size(struct nouveau_mman *cache, unsigned size)
}
/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */
-static INLINE uint32_t
+static inline uint32_t
mm_default_slab_size(unsigned chunk_order)
{
static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
@@ -263,7 +263,7 @@ nouveau_mm_create(struct nouveau_device *dev, uint32_t domain,
return cache;
}
-static INLINE void
+static inline void
nouveau_mm_free_slabs(struct list_head *head)
{
struct mm_slab *slab, *next;
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index c6e5074db19..b2290e7e784 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -68,17 +68,13 @@ nouveau_screen_fence_ref(struct pipe_screen *pscreen,
}
static boolean
-nouveau_screen_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *pfence)
-{
- return nouveau_fence_signalled(nouveau_fence(pfence));
-}
-
-static boolean
nouveau_screen_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *pfence,
uint64_t timeout)
{
+ if (!timeout)
+ return nouveau_fence_signalled(nouveau_fence(pfence));
+
return nouveau_fence_wait(nouveau_fence(pfence));
}
@@ -115,7 +111,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
}
-boolean
+bool
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
struct nouveau_bo *bo,
unsigned stride,
@@ -127,11 +123,11 @@ nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
return nouveau_bo_name_get(bo, &whandle->handle) == 0;
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
whandle->handle = bo->handle;
- return TRUE;
+ return true;
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
} else {
- return FALSE;
+ return false;
}
}
@@ -203,7 +199,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
pscreen->get_timestamp = nouveau_screen_get_timestamp;
pscreen->fence_reference = nouveau_screen_fence_ref;
- pscreen->fence_signalled = nouveau_screen_fence_signalled;
pscreen->fence_finish = nouveau_screen_fence_finish;
util_format_s3tc_init();
@@ -214,7 +209,8 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_CURSOR |
PIPE_BIND_SAMPLER_VIEW |
- PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE |
+ PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
+ PIPE_BIND_COMPUTE_RESOURCE |
PIPE_BIND_GLOBAL;
screen->sysmem_bindings =
PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 30041b271c9..4fdde9fbf3d 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -49,7 +49,7 @@ struct nouveau_screen {
int64_t cpu_gpu_time_delta;
- boolean hint_buf_keep_sysmem_copy;
+ bool hint_buf_keep_sysmem_copy;
unsigned vram_domain;
@@ -112,15 +112,15 @@ struct nouveau_screen {
# define NOUVEAU_DRV_STAT_IFD(x)
#endif
-static INLINE struct nouveau_screen *
+static inline struct nouveau_screen *
nouveau_screen(struct pipe_screen *pscreen)
{
return (struct nouveau_screen *)pscreen;
}
-boolean nouveau_drm_screen_unref(struct nouveau_screen *screen);
+bool nouveau_drm_screen_unref(struct nouveau_screen *screen);
-boolean
+bool
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
struct nouveau_bo *bo,
unsigned stride,
diff --git a/src/gallium/drivers/nouveau/nouveau_statebuf.h b/src/gallium/drivers/nouveau/nouveau_statebuf.h
index 4f8bd7bdf16..f38014091ba 100644
--- a/src/gallium/drivers/nouveau/nouveau_statebuf.h
+++ b/src/gallium/drivers/nouveau/nouveau_statebuf.h
@@ -20,7 +20,7 @@ struct nouveau_statebuf_builder
#define sb_data(sb, v) *(sb).p++ = (v)
#endif
-static INLINE uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size)
+static inline uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size)
{
return (size << 18) | (subc << 13) | mthd;
}
diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c
index d6330fa63a8..e414a534418 100644
--- a/src/gallium/drivers/nouveau/nouveau_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_video.c
@@ -100,7 +100,7 @@ nouveau_vpe_fini(struct nouveau_decoder *dec) {
dec->current = dec->future = dec->past = 8;
}
-static INLINE void
+static inline void
nouveau_vpe_mb_dct_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12_macroblock *mb)
{
int cbb;
@@ -125,7 +125,7 @@ nouveau_vpe_mb_dct_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12_
}
}
-static INLINE void
+static inline void
nouveau_vpe_mb_data_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12_macroblock *mb)
{
int cbb;
@@ -143,7 +143,7 @@ nouveau_vpe_mb_data_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12
}
}
-static INLINE void
+static inline void
nouveau_vpe_mb_dct_header(struct nouveau_decoder *dec,
const struct pipe_mpeg12_macroblock *mb,
bool luma)
@@ -187,7 +187,7 @@ nouveau_vpe_mb_dct_header(struct nouveau_decoder *dec,
x | (y << NV17_MPEG_CMD_MB_COORDS_Y__SHIFT));
}
-static INLINE unsigned int
+static inline unsigned int
nouveau_vpe_mb_mv_flags(bool luma, int mv_h, int mv_v, bool forward, bool first, bool vert)
{
unsigned mc_header = 0;
@@ -228,7 +228,7 @@ static int div_up(int val, int mult) {
return val / mult;
}
-static INLINE void
+static inline void
nouveau_vpe_mb_mv(struct nouveau_decoder *dec, unsigned mc_header,
bool luma, bool frame, bool forward, bool vert,
int x, int y, const short motions[2],
@@ -296,16 +296,16 @@ nouveau_vpe_mb_mv_header(struct nouveau_decoder *dec,
case PIPE_MPEG12_MO_TYPE_DUAL_PRIME: {
base = NV17_MPEG_CMD_CHROMA_MV_HEADER_COUNT_2;
if (forward) {
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE, FALSE,
- x, y, mb->PMV[0][0], dec->past, TRUE);
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE, TRUE,
- x, y2, mb->PMV[0][0], dec->past, FALSE);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true, false,
+ x, y, mb->PMV[0][0], dec->past, true);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true, true,
+ x, y2, mb->PMV[0][0], dec->past, false);
}
if (backward && forward) {
- nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, TRUE,
- x, y, mb->PMV[1][0], dec->future, TRUE);
- nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, FALSE,
- x, y2, mb->PMV[1][1], dec->future, FALSE);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, true,
+ x, y, mb->PMV[1][0], dec->future, true);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, false,
+ x, y2, mb->PMV[1][1], dec->future, false);
} else assert(!backward);
break;
}
@@ -320,13 +320,13 @@ nouveau_vpe_mb_mv_header(struct nouveau_decoder *dec,
if (frame)
base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_TYPE_FRAME;
if (forward)
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE,
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true,
dec->picture_structure != PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP,
- x, y, mb->PMV[0][0], dec->past, TRUE);
+ x, y, mb->PMV[0][0], dec->past, true);
if (backward && forward)
- nouveau_vpe_mb_mv(dec, base, luma, frame, FALSE,
+ nouveau_vpe_mb_mv(dec, base, luma, frame, false,
dec->picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP,
- x, y, mb->PMV[0][1], dec->future, TRUE);
+ x, y, mb->PMV[0][1], dec->future, true);
else assert(!backward);
break;
}
@@ -341,11 +341,11 @@ mv1:
base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_TYPE_FRAME;
/* frame 16x16 */
if (forward)
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE, FALSE,
- x, y, mb->PMV[0][0], dec->past, TRUE);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true, false,
+ x, y, mb->PMV[0][0], dec->past, true);
if (backward)
- nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, FALSE,
- x, y, mb->PMV[0][1], dec->future, TRUE);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, false,
+ x, y, mb->PMV[0][1], dec->future, true);
return;
mv2:
@@ -353,20 +353,20 @@ mv2:
if (!frame)
base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_MV_SPLIT_HALF_MB;
if (forward) {
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE,
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true,
mb->motion_vertical_field_select & PIPE_MPEG12_FS_FIRST_FORWARD,
- x, y, mb->PMV[0][0], dec->past, TRUE);
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE,
+ x, y, mb->PMV[0][0], dec->past, true);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true,
mb->motion_vertical_field_select & PIPE_MPEG12_FS_SECOND_FORWARD,
- x, y2, mb->PMV[1][0], dec->past, FALSE);
+ x, y2, mb->PMV[1][0], dec->past, false);
}
if (backward) {
nouveau_vpe_mb_mv(dec, base, luma, frame, !forward,
mb->motion_vertical_field_select & PIPE_MPEG12_FS_FIRST_BACKWARD,
- x, y, mb->PMV[0][1], dec->future, TRUE);
+ x, y, mb->PMV[0][1], dec->future, true);
nouveau_vpe_mb_mv(dec, base, luma, frame, !forward,
mb->motion_vertical_field_select & PIPE_MPEG12_FS_SECOND_BACKWARD,
- x, y2, mb->PMV[1][1], dec->future, FALSE);
+ x, y2, mb->PMV[1][1], dec->future, false);
}
}
@@ -438,14 +438,14 @@ nouveau_decoder_decode_macroblock(struct pipe_video_codec *decoder,
mb = (const struct pipe_mpeg12_macroblock *)pipe_mb;
for (i = 0; i < num_macroblocks; ++i, mb++) {
if (mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA) {
- nouveau_vpe_mb_dct_header(dec, mb, TRUE);
- nouveau_vpe_mb_dct_header(dec, mb, FALSE);
+ nouveau_vpe_mb_dct_header(dec, mb, true);
+ nouveau_vpe_mb_dct_header(dec, mb, false);
} else {
- nouveau_vpe_mb_mv_header(dec, mb, TRUE);
- nouveau_vpe_mb_dct_header(dec, mb, TRUE);
+ nouveau_vpe_mb_mv_header(dec, mb, true);
+ nouveau_vpe_mb_dct_header(dec, mb, true);
- nouveau_vpe_mb_mv_header(dec, mb, FALSE);
- nouveau_vpe_mb_dct_header(dec, mb, FALSE);
+ nouveau_vpe_mb_mv_header(dec, mb, false);
+ nouveau_vpe_mb_dct_header(dec, mb, false);
}
if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
nouveau_vpe_mb_dct_blocks(dec, mb);
diff --git a/src/gallium/drivers/nouveau/nouveau_video.h b/src/gallium/drivers/nouveau/nouveau_video.h
index 08d48b371fd..fd1bd527deb 100644
--- a/src/gallium/drivers/nouveau/nouveau_video.h
+++ b/src/gallium/drivers/nouveau/nouveau_video.h
@@ -45,7 +45,7 @@ struct nouveau_decoder {
#define NV31_VIDEO_BIND_CMD NV31_MPEG_IMAGE_Y_OFFSET__LEN
#define NV31_VIDEO_BIND_COUNT (NV31_MPEG_IMAGE_Y_OFFSET__LEN + 1)
-static INLINE void
+static inline void
nouveau_vpe_write(struct nouveau_decoder *dec, unsigned data) {
dec->cmds[dec->ofs++] = data;
}
@@ -54,33 +54,33 @@ nouveau_vpe_write(struct nouveau_decoder *dec, unsigned data) {
#define NV31_MPEG(mthd) SUBC_MPEG(NV31_MPEG_##mthd)
#define NV84_MPEG(mthd) SUBC_MPEG(NV84_MPEG_##mthd)
-static INLINE uint32_t
+static inline uint32_t
NV04_FIFO_PKHDR(int subc, int mthd, unsigned size)
{
return 0x00000000 | (size << 18) | (subc << 13) | mthd;
}
-static INLINE uint32_t
+static inline uint32_t
NV04_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
{
return 0x40000000 | (size << 18) | (subc << 13) | mthd;
}
-static INLINE void
+static inline void
BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
PUSH_SPACE(push, size + 1);
PUSH_DATA (push, NV04_FIFO_PKHDR(subc, mthd, size));
}
-static INLINE void
+static inline void
BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
PUSH_SPACE(push, size + 1);
PUSH_DATA (push, NV04_FIFO_PKHDR_NI(subc, mthd, size));
}
-static INLINE void
+static inline void
PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd,
struct nouveau_bo *bo, uint32_t offset,
struct nouveau_bufctx *ctx, int bin, uint32_t rw)
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video.h b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
index 279a1ce18ef..33e3bef3df3 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.h
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
@@ -135,22 +135,22 @@ struct comm {
uint32_t parse_endpos[0x10]; // 1c0
};
-static INLINE uint32_t nouveau_vp3_video_align(uint32_t h)
+static inline uint32_t nouveau_vp3_video_align(uint32_t h)
{
return ((h+0x3f)&~0x3f);
};
-static INLINE uint32_t mb(uint32_t coord)
+static inline uint32_t mb(uint32_t coord)
{
return (coord + 0xf)>>4;
}
-static INLINE uint32_t mb_half(uint32_t coord)
+static inline uint32_t mb_half(uint32_t coord)
{
return (coord + 0x1f)>>5;
}
-static INLINE uint64_t
+static inline uint64_t
nouveau_vp3_video_addr(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target)
{
uint64_t ret;
@@ -161,7 +161,7 @@ nouveau_vp3_video_addr(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video
return dec->ref_bo->offset + ret;
}
-static INLINE void
+static inline void
nouveau_vp3_ycbcr_offsets(struct nouveau_vp3_decoder *dec, uint32_t *y2,
uint32_t *cbcr, uint32_t *cbcr2)
{
@@ -182,7 +182,7 @@ nouveau_vp3_ycbcr_offsets(struct nouveau_vp3_decoder *dec, uint32_t *y2,
}
}
-static INLINE void
+static inline void
nouveau_vp3_inter_sizes(struct nouveau_vp3_decoder *dec, uint32_t slice_count,
uint32_t *slice_size, uint32_t *bucket_size,
uint32_t *ring_size)
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index 51effb1d8d2..389a229eb78 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -15,34 +15,34 @@
#define NOUVEAU_MIN_BUFFER_MAP_ALIGN 64
#define NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK (NOUVEAU_MIN_BUFFER_MAP_ALIGN - 1)
-static INLINE uint32_t
+static inline uint32_t
PUSH_AVAIL(struct nouveau_pushbuf *push)
{
return push->end - push->cur;
}
-static INLINE boolean
+static inline bool
PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
{
if (PUSH_AVAIL(push) < size)
return nouveau_pushbuf_space(push, size, 0, 0) == 0;
- return TRUE;
+ return true;
}
-static INLINE void
+static inline void
PUSH_DATA(struct nouveau_pushbuf *push, uint32_t data)
{
*push->cur++ = data;
}
-static INLINE void
+static inline void
PUSH_DATAp(struct nouveau_pushbuf *push, const void *data, uint32_t size)
{
memcpy(push->cur, data, size * 4);
push->cur += size;
}
-static INLINE void
+static inline void
PUSH_DATAf(struct nouveau_pushbuf *push, float f)
{
union { float f; uint32_t i; } u;
@@ -50,7 +50,7 @@ PUSH_DATAf(struct nouveau_pushbuf *push, float f)
PUSH_DATA(push, u.i);
}
-static INLINE void
+static inline void
PUSH_KICK(struct nouveau_pushbuf *push)
{
nouveau_pushbuf_kick(push, push->channel);
@@ -60,7 +60,7 @@ PUSH_KICK(struct nouveau_pushbuf *push)
#define NOUVEAU_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
#define NOUVEAU_RESOURCE_FLAG_DRV_PRIV (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
-static INLINE uint32_t
+static inline uint32_t
nouveau_screen_transfer_flags(unsigned pipe)
{
uint32_t flags = 0;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h b/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
index 447f4b3b7ae..95468e580dd 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
@@ -1459,6 +1459,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV40_3D_VTX_CACHE_INVALIDATE 0x00001714
+#define NV40_3D_VB_ELEMENT_BASE 0x0000173c
+
#define NV30_3D_VTXFMT(i0) (0x00001740 + 0x4*(i0))
#define NV30_3D_VTXFMT__ESIZE 0x00000004
#define NV30_3D_VTXFMT__LEN 0x00000010
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_clear.c b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
index 83fd1fa38dd..118cac77277 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
@@ -32,7 +32,7 @@
#include "nv30/nv30_context.h"
#include "nv30/nv30_format.h"
-static INLINE uint32_t
+static inline uint32_t
pack_rgba(enum pipe_format format, const float *rgba)
{
union util_color uc;
@@ -40,7 +40,7 @@ pack_rgba(enum pipe_format format, const float *rgba)
return uc.ui[0];
}
-static INLINE uint32_t
+static inline uint32_t
pack_zeta(enum pipe_format format, double depth, unsigned stencil)
{
uint32_t zuint = (uint32_t)(depth * 4294967295.0);
@@ -58,7 +58,7 @@ nv30_clear(struct pipe_context *pipe, unsigned buffers,
struct pipe_framebuffer_state *fb = &nv30->framebuffer;
uint32_t colr = 0, zeta = 0, mode = 0;
- if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, TRUE))
+ if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, true))
return;
if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.c b/src/gallium/drivers/nouveau/nv30/nv30_context.c
index 617b0887810..6e88ed725d6 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c
@@ -45,7 +45,7 @@ nv30_context_kick_notify(struct nouveau_pushbuf *push)
screen = &nv30->screen->base;
nouveau_fence_next(screen);
- nouveau_fence_update(screen, TRUE);
+ nouveau_fence_update(screen, true);
if (push->bufctx) {
struct nouveau_bufref *bref;
@@ -165,6 +165,12 @@ nv30_context_destroy(struct pipe_context *pipe)
if (nv30->draw)
draw_destroy(nv30->draw);
+ if (nv30->blit_vp)
+ nouveau_heap_free(&nv30->blit_vp);
+
+ if (nv30->blit_fp)
+ pipe_resource_reference(&nv30->blit_fp, NULL);
+
if (nv30->screen->base.pushbuf->user_priv == &nv30->bufctx)
nv30->screen->base.pushbuf->user_priv = NULL;
@@ -233,7 +239,7 @@ nv30_context_create(struct pipe_screen *pscreen, void *priv)
nv30->config.aniso = NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_OFF;
- if (debug_get_bool_option("NV30_SWTNL", FALSE))
+ if (debug_get_bool_option("NV30_SWTNL", false))
nv30->draw_flags |= NV30_NEW_SWTNL;
nv30->sample_mask = 0xffff;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.h b/src/gallium/drivers/nouveau/nv30/nv30_context.h
index 592cdbe24f9..d5c18bb62dc 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h
@@ -51,7 +51,8 @@ struct nv30_context {
unsigned rt_enable;
unsigned scissor_off;
unsigned num_vtxelts;
- boolean prim_restart;
+ int index_bias;
+ bool prim_restart;
struct nv30_fragprog *fragprog;
} state;
@@ -114,17 +115,17 @@ struct nv30_context {
uint32_t vbo_user;
unsigned vbo_min_index;
unsigned vbo_max_index;
- boolean vbo_push_hint;
+ bool vbo_push_hint;
struct nouveau_heap *blit_vp;
struct pipe_resource *blit_fp;
struct pipe_query *render_cond_query;
unsigned render_cond_mode;
- boolean render_cond_cond;
+ bool render_cond_cond;
};
-static INLINE struct nv30_context *
+static inline struct nv30_context *
nv30_context(struct pipe_context *pipe)
{
return (struct nv30_context *)pipe;
@@ -203,8 +204,8 @@ nv30_draw_init(struct pipe_context *pipe);
void
nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
-boolean
-nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl);
+bool
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, bool hwtnl);
void
nv30_state_release(struct nv30_context *nv30);
@@ -213,7 +214,7 @@ nv30_state_release(struct nv30_context *nv30);
#define NV30_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NV30_3D_VERTEX_BEGIN_END_##n
-static INLINE unsigned
+static inline unsigned
nv30_prim_gl(unsigned prim)
{
switch (prim) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
index c1665b7ad2f..098d6e499fa 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
@@ -52,7 +52,7 @@ struct nv30_render {
uint32_t prim;
};
-static INLINE struct nv30_render *
+static inline struct nv30_render *
nv30_render(struct vbuf_render *render)
{
return (struct nv30_render *)render;
@@ -79,12 +79,12 @@ nv30_render_allocate_vertices(struct vbuf_render *render,
PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM,
render->max_vertex_buffer_bytes);
if (!r->buffer)
- return FALSE;
+ return false;
r->offset = 0;
}
- return TRUE;
+ return true;
}
static void *
@@ -134,7 +134,7 @@ nv30_render_draw_elements(struct vbuf_render *render,
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, ~0, FALSE))
+ if (!nv30_state_validate(nv30, ~0, false))
return;
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -179,7 +179,7 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, ~0, FALSE))
+ if (!nv30_state_validate(nv30, ~0, false))
return;
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -221,7 +221,7 @@ static const struct {
[TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 },
};
-static boolean
+static bool
vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
{
struct nv30_screen *screen = r->nv30->screen;
@@ -245,7 +245,7 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
}
if (emit == EMIT_OMIT)
- return FALSE;
+ return false;
draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib);
format = draw_translate_vinfo_format(emit);
@@ -272,10 +272,10 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
assert(sem == TGSI_SEMANTIC_TEXCOORD);
*idx = 0x00001000 << (result - 8);
}
- return TRUE;
+ return true;
}
-static boolean
+static bool
nv30_render_validate(struct nv30_context *nv30)
{
struct nv30_render *r = nv30_render(nv30->draw->render);
@@ -300,7 +300,7 @@ nv30_render_validate(struct nv30_context *nv30)
}
if (nouveau_heap_alloc(heap, 16, &r->vertprog, &r->vertprog))
- return FALSE;
+ return false;
}
}
@@ -370,7 +370,7 @@ nv30_render_validate(struct nv30_context *nv30)
}
vinfo->size /= 4;
- return TRUE;
+ return true;
}
void
@@ -519,6 +519,6 @@ nv30_draw_init(struct pipe_context *pipe)
draw_set_rasterize_stage(draw, stage);
draw_wide_line_threshold(draw, 10000000.f);
draw_wide_point_threshold(draw, 10000000.f);
- draw_wide_point_sprites(draw, TRUE);
+ draw_wide_point_sprites(draw, true);
nv30->draw = draw;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_format.h b/src/gallium/drivers/nouveau/nv30/nv30_format.h
index 8bf4a37299f..fa1e922fb65 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_format.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_format.h
@@ -27,28 +27,28 @@ struct nv30_texfmt {
};
extern const struct nv30_format_info nv30_format_info_table[];
-static INLINE const struct nv30_format_info *
+static inline const struct nv30_format_info *
nv30_format_info(struct pipe_screen *pscreen, enum pipe_format format)
{
return &nv30_format_info_table[format];
}
extern const struct nv30_format nv30_format_table[];
-static INLINE const struct nv30_format *
+static inline const struct nv30_format *
nv30_format(struct pipe_screen *pscreen, enum pipe_format format)
{
return &nv30_format_table[format];
}
extern const struct nv30_vtxfmt nv30_vtxfmt_table[];
-static INLINE const struct nv30_vtxfmt *
+static inline const struct nv30_vtxfmt *
nv30_vtxfmt(struct pipe_screen *pscreen, enum pipe_format format)
{
return &nv30_vtxfmt_table[format];
}
extern const struct nv30_texfmt nv30_texfmt_table[];
-static INLINE const struct nv30_texfmt *
+static inline const struct nv30_texfmt *
nv30_texfmt(struct pipe_screen *pscreen, enum pipe_format format)
{
return &nv30_texfmt_table[format];
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
index 7f227868f73..6de61bcc1c0 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
@@ -37,22 +37,26 @@ nv30_fragprog_upload(struct nv30_context *nv30)
struct nouveau_context *nv = &nv30->base;
struct nv30_fragprog *fp = nv30->fragprog.program;
struct pipe_context *pipe = &nv30->base.pipe;
- struct pipe_transfer *transfer;
- uint32_t *map;
- int i; (void)i;
- if (unlikely(!fp->buffer)) {
+ if (unlikely(!fp->buffer))
fp->buffer = pipe_buffer_create(pipe->screen, 0, 0, fp->insn_len * 4);
- }
- map = pipe_buffer_map(pipe, fp->buffer, PIPE_TRANSFER_WRITE, &transfer);
#ifndef PIPE_ARCH_BIG_ENDIAN
- memcpy(map, fp->insn, fp->insn_len * 4);
+ pipe_buffer_write(pipe, fp->buffer, 0, fp->insn_len * 4, fp->insn);
#else
- for (i = 0; i < fp->insn_len; i++)
- *map++ = (fp->insn[i] >> 16) | (fp->insn[i] << 16);
+ {
+ struct pipe_transfer *transfer;
+ uint32_t *map;
+ int i;
+
+ map = pipe_buffer_map(pipe, fp->buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
+ &transfer);
+ for (i = 0; i < fp->insn_len; i++)
+ *map++ = (fp->insn[i] >> 16) | (fp->insn[i] << 16);
+ pipe_buffer_unmap(pipe, transfer);
+ }
#endif
- pipe_buffer_unmap(pipe, transfer);
if (nv04_resource(fp->buffer)->domain != NOUVEAU_BO_VRAM)
nouveau_buffer_migrate(nv, nv04_resource(fp->buffer), NOUVEAU_BO_VRAM);
@@ -64,7 +68,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
struct nouveau_pushbuf *push = nv30->base.pushbuf;
struct nouveau_object *eng3d = nv30->screen->eng3d;
struct nv30_fragprog *fp = nv30->fragprog.program;
- boolean upload = FALSE;
+ bool upload = false;
int i;
if (!fp->translated) {
@@ -72,7 +76,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
if (!fp->translated)
return;
- upload = TRUE;
+ upload = true;
}
/* update constants, also needs to be done on every fp switch as we
@@ -89,7 +93,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
if (!memcmp(&fp->insn[off], &cbuf[idx], 4 * 4))
continue;
memcpy(&fp->insn[off], &cbuf[idx], 4 * 4);
- upload = TRUE;
+ upload = true;
}
}
@@ -161,8 +165,15 @@ static void
nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_fragprog *fp = hwcso;
+
+ /* reset the bucftx so that we don't keep a dangling reference to the fp
+ * code
+ */
+ if (fp != nv30->state.fragprog)
+ PUSH_RESET(nv30->base.pushbuf, BUFCTX_FRAGPROG);
- nv30->fragprog.program = hwcso;
+ nv30->fragprog.program = fp;
nv30->dirty |= NV30_NEW_FRAGPROG;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
index 1a4b8929c0f..c75b4b95fd8 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -33,7 +33,7 @@
#include "nv30/nv30_resource.h"
#include "nv30/nv30_transfer.h"
-static INLINE unsigned
+static inline unsigned
layer_offset(struct pipe_resource *pt, unsigned level, unsigned layer)
{
struct nv30_miptree *mt = nv30_miptree(pt);
@@ -54,7 +54,7 @@ nv30_miptree_get_handle(struct pipe_screen *pscreen,
unsigned stride;
if (!mt || !mt->base.bo)
- return FALSE;
+ return false;
stride = mt->level[0].pitch;
@@ -78,13 +78,13 @@ struct nv30_transfer {
unsigned nblocksy;
};
-static INLINE struct nv30_transfer *
+static inline struct nv30_transfer *
nv30_transfer(struct pipe_transfer *ptx)
{
return (struct nv30_transfer *)ptx;
}
-static INLINE void
+static inline void
define_rect(struct pipe_resource *pt, unsigned level, unsigned z,
unsigned x, unsigned y, unsigned w, unsigned h,
struct nv30_rect *rect)
@@ -242,8 +242,8 @@ nv30_miptree_transfer_map(struct pipe_context *pipe, struct pipe_resource *pt,
tx->base.level = level;
tx->base.usage = usage;
tx->base.box = *box;
- tx->base.stride = util_format_get_nblocksx(pt->format, box->width) *
- util_format_get_blocksize(pt->format);
+ tx->base.stride = align(util_format_get_nblocksx(pt->format, box->width) *
+ util_format_get_blocksize(pt->format), 64);
tx->base.layer_stride = util_format_get_nblocksy(pt->format, box->height) *
tx->base.stride;
@@ -372,7 +372,7 @@ nv30_miptree_create(struct pipe_screen *pscreen,
}
if (!mt->uniform_pitch)
- mt->swizzled = TRUE;
+ mt->swizzled = true;
size = 0;
for (l = 0; l <= pt->last_level; l++) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_push.c b/src/gallium/drivers/nouveau/nv30/nv30_push.c
index e0734fa70d3..67ab0508c17 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_push.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_push.c
@@ -47,12 +47,12 @@ struct push_context {
struct translate *translate;
- boolean primitive_restart;
+ bool primitive_restart;
uint32_t prim;
uint32_t restart_index;
};
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
{
unsigned i;
@@ -62,7 +62,7 @@ prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
{
unsigned i;
@@ -72,7 +72,7 @@ prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
{
unsigned i;
@@ -199,7 +199,7 @@ nv30_push_vbo(struct nv30_context *nv30, const struct pipe_draw_info *info)
{
struct push_context ctx;
unsigned i, index_size;
- boolean apply_bias = info->indexed && info->index_bias;
+ bool apply_bias = info->indexed && info->index_bias;
ctx.push = nv30->base.pushbuf;
ctx.translate = nv30->vertex->translate;
@@ -241,7 +241,7 @@ nv30_push_vbo(struct nv30_context *nv30, const struct pipe_draw_info *info)
} else {
ctx.idxbuf = NULL;
index_size = 0;
- ctx.primitive_restart = FALSE;
+ ctx.primitive_restart = false;
ctx.restart_index = 0;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_query.c b/src/gallium/drivers/nouveau/nv30/nv30_query.c
index 516ee83168e..3980be9579a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_query.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c
@@ -98,7 +98,7 @@ struct nv30_query {
uint64_t result;
};
-static INLINE struct nv30_query *
+static inline struct nv30_query *
nv30_query(struct pipe_query *pipe)
{
return (struct nv30_query *)pipe;
@@ -208,7 +208,7 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
if (ntfy1) {
while (ntfy1[3] & 0xff000000) {
if (!wait)
- return FALSE;
+ return false;
}
switch (q->type) {
@@ -228,7 +228,7 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
}
*res64 = q->result;
- return TRUE;
+ return true;
}
static void
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.c b/src/gallium/drivers/nouveau/nv30/nv30_resource.c
index 38fac8af898..a98a6464de8 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.c
@@ -42,12 +42,12 @@ nv30_memory_barrier(struct pipe_context *pipe, unsigned flags)
if (!nv30->vtxbuf[i].buffer)
continue;
if (nv30->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
if (nv30->idxbuf.buffer &&
nv30->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.h b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
index 1981c8d9ab9..8dac7795c9d 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
@@ -15,7 +15,7 @@ struct nv30_surface {
uint16_t depth;
};
-static INLINE struct nv30_surface *
+static inline struct nv30_surface *
nv30_surface(struct pipe_surface *ps)
{
return (struct nv30_surface *)ps;
@@ -32,13 +32,13 @@ struct nv30_miptree {
struct nv30_miptree_level level[13];
uint32_t uniform_pitch;
uint32_t layer_size;
- boolean swizzled;
+ bool swizzled;
unsigned ms_mode;
unsigned ms_x:1;
unsigned ms_y:1;
};
-static INLINE struct nv30_miptree *
+static inline struct nv30_miptree *
nv30_miptree(struct pipe_resource *pt)
{
return (struct nv30_miptree *)pt;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 2e38a1978ae..7aad26ba18b 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -69,6 +69,8 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return PIPE_ENDIAN_LITTLE;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 16;
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_MAX_VIEWPORTS:
return 1;
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
@@ -96,6 +98,9 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return 1;
+ /* nv35 capabilities */
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ return eng3d->oclass == NV35_3D_CLASS || eng3d->oclass >= NV40_3D_CLASS;
/* nv4x capabilities */
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_NPOT_TEXTURES:
@@ -135,7 +140,6 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
- case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
@@ -162,6 +166,9 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -313,12 +320,12 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen,
unsigned bindings)
{
if (sample_count > 4)
- return FALSE;
+ return false;
if (!(0x00000017 & (1 << sample_count)))
- return FALSE;
+ return false;
if (!util_format_is_supported(format, bindings)) {
- return FALSE;
+ return false;
}
/* transfers & shared are always supported */
@@ -656,6 +663,6 @@ nv30_screen_create(struct nouveau_device *dev)
nouveau_pushbuf_kick(push, push->channel);
- nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.h b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
index 3f2e47fec99..7b17b88097c 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
@@ -40,7 +40,7 @@ struct nv30_screen {
struct nouveau_heap *vp_data_heap;
};
-static INLINE struct nv30_screen *
+static inline struct nv30_screen *
nv30_screen(struct pipe_screen *pscreen)
{
return (struct nv30_screen *)pscreen;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state.c b/src/gallium/drivers/nouveau/nv30/nv30_state.c
index 708ba34c1e5..fd604c2266d 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state.c
@@ -211,6 +211,7 @@ static void *
nv30_zsa_state_create(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *cso)
{
+ struct nouveau_object *eng3d = nv30_context(pipe)->screen->eng3d;
struct nv30_zsa_stateobj *so;
so = CALLOC_STRUCT(nv30_zsa_stateobj);
@@ -223,6 +224,13 @@ nv30_zsa_state_create(struct pipe_context *pipe,
SB_DATA (so, cso->depth.writemask);
SB_DATA (so, cso->depth.enabled);
+ if (eng3d->oclass == NV35_3D_CLASS || eng3d->oclass >= NV40_3D_CLASS) {
+ SB_MTHD35(so, DEPTH_BOUNDS_TEST_ENABLE, 3);
+ SB_DATA (so, cso->depth.bounds_test);
+ SB_DATA (so, fui(cso->depth.bounds_min));
+ SB_DATA (so, fui(cso->depth.bounds_max));
+ }
+
if (cso->stencil[0].enabled) {
SB_MTHD30(so, STENCIL_ENABLE(0), 3);
SB_DATA (so, 1);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state.h b/src/gallium/drivers/nouveau/nv30/nv30_state.h
index e27e16fae82..ed3b8103a00 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state.h
@@ -13,6 +13,8 @@
#define SB_DATA(so, u) (so)->data[(so)->size++] = (u)
#define SB_MTHD30(so, mthd, size) \
SB_DATA((so), ((size) << 18) | (7 << 13) | NV30_3D_##mthd)
+#define SB_MTHD35(so, mthd, size) \
+ SB_DATA((so), ((size) << 18) | (7 << 13) | NV35_3D_##mthd)
#define SB_MTHD40(so, mthd, size) \
SB_DATA((so), ((size) << 18) | (7 << 13) | NV40_3D_##mthd)
@@ -30,7 +32,7 @@ struct nv30_rasterizer_stateobj {
struct nv30_zsa_stateobj {
struct pipe_depth_stencil_alpha_state pipe;
- unsigned data[32];
+ unsigned data[36];
unsigned size;
};
@@ -80,7 +82,7 @@ struct nv30_vertprog {
struct tgsi_shader_info info;
struct draw_vertex_shader *draw;
- boolean translated;
+ bool translated;
unsigned enabled_ucps;
uint16_t texcoord[10];
@@ -109,7 +111,7 @@ struct nv30_fragprog {
struct tgsi_shader_info info;
struct draw_fragment_shader *draw;
- boolean translated;
+ bool translated;
uint32_t *insn;
unsigned insn_len;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
index a954dcce562..8957634f0fa 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
@@ -453,8 +453,8 @@ nv30_state_context_switch(struct nv30_context *nv30)
nv30->base.pushbuf->user_priv = &nv30->bufctx;
}
-boolean
-nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
+bool
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, bool hwtnl)
{
struct nouveau_screen *screen = &nv30->screen->base;
struct nouveau_pushbuf *push = nv30->base.pushbuf;
@@ -494,7 +494,7 @@ nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
nouveau_pushbuf_bufctx(push, bctx);
if (nouveau_pushbuf_validate(push)) {
nouveau_pushbuf_bufctx(push, NULL);
- return FALSE;
+ return false;
}
/*XXX*/
@@ -528,7 +528,7 @@ nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
}
}
- return TRUE;
+ return true;
}
void
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_texture.c b/src/gallium/drivers/nouveau/nv30/nv30_texture.c
index c3567217442..bfe21cceaa2 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_texture.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_texture.c
@@ -37,7 +37,7 @@
#define NV40_WRAP(n) \
case PIPE_TEX_WRAP_##n: ret = NV40_3D_TEX_WRAP_S_##n; break
-static INLINE unsigned
+static inline unsigned
wrap_mode(unsigned pipe)
{
unsigned ret = NV30_3D_TEX_WRAP_S_REPEAT;
@@ -58,7 +58,7 @@ wrap_mode(unsigned pipe)
return ret >> NV30_3D_TEX_WRAP_S__SHIFT;
}
-static INLINE unsigned
+static inline unsigned
filter_mode(const struct pipe_sampler_state *cso)
{
unsigned filter;
@@ -104,7 +104,7 @@ filter_mode(const struct pipe_sampler_state *cso)
return filter;
}
-static INLINE unsigned
+static inline unsigned
compare_mode(const struct pipe_sampler_state *cso)
{
if (cso->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE)
@@ -201,7 +201,7 @@ nv30_bind_sampler_states(struct pipe_context *pipe,
}
}
-static INLINE uint32_t
+static inline uint32_t
swizzle(const struct nv30_texfmt *fmt, unsigned cmp, unsigned swz)
{
uint32_t data = fmt->swz[swz].src << 8;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
index 99bc0994ac2..214da6568c3 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -41,33 +41,33 @@
* of different ways.
*/
-static INLINE boolean
+static inline bool
nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst)
{
if (src->x1 - src->x0 != dst->x1 - dst->x0)
- return TRUE;
+ return true;
if (src->y1 - src->y0 != dst->y1 - dst->y0)
- return TRUE;
- return FALSE;
+ return true;
+ return false;
}
-static INLINE boolean
+static inline bool
nv30_transfer_blit(XFER_ARGS)
{
if (nv30->screen->eng3d->oclass < NV40_3D_CLASS)
- return FALSE;
+ return false;
if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1)
- return FALSE;
+ return false;
if (dst->w < 2 || dst->h < 2)
- return FALSE;
+ return false;
if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch))
- return FALSE;
+ return false;
if (src->cpp > 4)
- return FALSE;
- return TRUE;
+ return false;
+ return true;
}
-static INLINE struct nouveau_heap *
+static inline struct nouveau_heap *
nv30_transfer_rect_vertprog(struct nv30_context *nv30)
{
struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
@@ -108,7 +108,7 @@ nv30_transfer_rect_vertprog(struct nv30_context *nv30)
}
-static INLINE struct nv04_resource *
+static inline struct nv04_resource *
nv30_transfer_rect_fragprog(struct nv30_context *nv30)
{
struct nv04_resource *fp = nv04_resource(nv30->blit_fp);
@@ -368,29 +368,29 @@ nv30_transfer_rect_blit(XFER_ARGS)
PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
}
-static boolean
+static bool
nv30_transfer_sifm(XFER_ARGS)
{
if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
- return FALSE;
+ return false;
if (src->d > 1 || dst->d > 1)
- return FALSE;
+ return false;
if (dst->offset & 63)
- return FALSE;
+ return false;
if (!dst->pitch) {
if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
- return FALSE;
+ return false;
} else {
if (dst->domain != NOUVEAU_BO_VRAM)
- return FALSE;
+ return false;
if (dst->pitch & 63)
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
static void
@@ -481,14 +481,14 @@ nv30_transfer_rect_sifm(XFER_ARGS)
* that name is still accurate on nv4x) error.
*/
-static boolean
+static bool
nv30_transfer_m2mf(XFER_ARGS)
{
if (!src->pitch || !dst->pitch)
- return FALSE;
+ return false;
if (nv30_transfer_scaled(src, dst))
- return FALSE;
- return TRUE;
+ return false;
+ return true;
}
static void
@@ -540,12 +540,12 @@ nv30_transfer_rect_m2mf(XFER_ARGS)
}
}
-static boolean
+static bool
nv30_transfer_cpu(XFER_ARGS)
{
if (nv30_transfer_scaled(src, dst))
- return FALSE;
- return TRUE;
+ return false;
+ return true;
}
static char *
@@ -554,7 +554,7 @@ linear_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
return base + (y * rect->pitch) + (x * rect->cpp);
}
-static INLINE unsigned
+static inline unsigned
swizzle2d(unsigned v, unsigned s)
{
v = (v | (v << 8)) & 0x00ff00ff;
@@ -614,7 +614,7 @@ swizzle3d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int);
-static INLINE get_ptr_t
+static inline get_ptr_t
get_ptr(struct nv30_rect *rect)
{
if (rect->pitch)
@@ -653,7 +653,7 @@ nv30_transfer_rect(struct nv30_context *nv30, enum nv30_transfer_filter filter,
{
static const struct {
char *name;
- boolean (*possible)(XFER_ARGS);
+ bool (*possible)(XFER_ARGS);
void (*execute)(XFER_ARGS);
} *method, methods[] = {
{ "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf },
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
index d4e384b21d2..8494549e9b1 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -79,7 +79,7 @@ nv30_emit_vtxattr(struct nv30_context *nv30, struct pipe_vertex_buffer *vb,
}
}
-static INLINE void
+static inline void
nv30_vbuf_range(struct nv30_context *nv30, int vbi,
uint32_t *base, uint32_t *size)
{
@@ -119,7 +119,7 @@ nv30_prevalidate_vbufs(struct nv30_context *nv30)
} else {
nouveau_buffer_migrate(&nv30->base, buf, NOUVEAU_BO_GART);
}
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
}
}
@@ -160,10 +160,10 @@ nv30_update_user_vbufs(struct nv30_context *nv30)
NOUVEAU_BO_LOW | NOUVEAU_BO_RD,
0, NV30_3D_VTXBUF_DMA1);
}
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
-static INLINE void
+static inline void
nv30_release_user_vbufs(struct nv30_context *nv30)
{
uint32_t vbo_user = nv30->vbo_user;
@@ -202,6 +202,9 @@ nv30_vbo_validate(struct nv30_context *nv30)
return;
redefine = MAX2(vertex->num_elements, nv30->state.num_vtxelts);
+ if (redefine == 0)
+ return;
+
BEGIN_NV04(push, NV30_3D(VTXFMT(0)), redefine);
for (i = 0; i < vertex->num_elements; i++) {
@@ -221,7 +224,7 @@ nv30_vbo_validate(struct nv30_context *nv30)
for (i = 0; i < vertex->num_elements; i++) {
struct nv04_resource *res;
unsigned offset;
- boolean user;
+ bool user;
ve = &vertex->pipe[i];
vb = &nv30->vtxbuf[ve->vertex_buffer_index];
@@ -254,14 +257,12 @@ nv30_vertex_state_create(struct pipe_context *pipe, unsigned num_elements,
struct translate_key transkey;
unsigned i;
- assert(num_elements);
-
so = MALLOC(sizeof(*so) + sizeof(*so->element) * num_elements);
if (!so)
return NULL;
memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
so->num_elements = num_elements;
- so->need_conversion = FALSE;
+ so->need_conversion = false;
transkey.nr_elements = 0;
transkey.output_stride = 0;
@@ -284,7 +285,7 @@ nv30_vertex_state_create(struct pipe_context *pipe, unsigned num_elements,
return NULL;
}
so->element[i].state = nv30_vtxfmt(pipe->screen, fmt)->hw;
- so->need_conversion = TRUE;
+ so->need_conversion = true;
}
if (1) {
@@ -452,7 +453,7 @@ nv30_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
}
static void
-nv30_draw_elements(struct nv30_context *nv30, boolean shorten,
+nv30_draw_elements(struct nv30_context *nv30, bool shorten,
unsigned mode, unsigned start, unsigned count,
unsigned instance_count, int32_t index_bias)
{
@@ -461,13 +462,11 @@ nv30_draw_elements(struct nv30_context *nv30, boolean shorten,
struct nouveau_object *eng3d = nv30->screen->eng3d;
unsigned prim = nv30_prim_gl(mode);
-#if 0 /*XXX*/
- if (index_bias != nv30->state.index_bias) {
- BEGIN_NV04(push, NV30_3D(VB_ELEMENT_BASE), 1);
+ if (eng3d->oclass >= NV40_3D_CLASS && index_bias != nv30->state.index_bias) {
+ BEGIN_NV04(push, NV40_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, index_bias);
nv30->state.index_bias = index_bias;
}
-#endif
if (eng3d->oclass == NV40_3D_CLASS && index_size > 1 &&
nv30->idxbuf.buffer) {
@@ -564,7 +563,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS)))
nv30_update_user_vbufs(nv30);
- nv30_state_validate(nv30, ~0, TRUE);
+ nv30_state_validate(nv30, ~0, true);
if (nv30->draw_flags) {
nv30_render_vbo(pipe, info);
return;
@@ -578,17 +577,17 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!nv30->vtxbuf[i].buffer)
continue;
if (nv30->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
if (!nv30->base.vbo_dirty && nv30->idxbuf.buffer &&
nv30->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
if (nv30->base.vbo_dirty) {
BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1);
PUSH_DATA (push, 0);
- nv30->base.vbo_dirty = FALSE;
+ nv30->base.vbo_dirty = false;
}
if (!info->indexed) {
@@ -596,7 +595,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->mode, info->start, info->count,
info->instance_count);
} else {
- boolean shorten = info->max_index <= 65535;
+ bool shorten = info->max_index <= 65535;
if (info->primitive_restart != nv30->state.prim_restart) {
if (info->primitive_restart) {
@@ -605,7 +604,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->restart_index);
if (info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
} else {
BEGIN_NV04(push, NV40_3D(PRIM_RESTART_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -617,7 +616,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->restart_index);
if (info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
}
nv30_draw_elements(nv30, shorten,
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
index 4d4145d10b5..ee0a6280d7a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
@@ -48,7 +48,7 @@ nv30_vertprog_destroy(struct nv30_vertprog *vp)
vp->consts = NULL;
vp->nr_consts = 0;
- vp->translated = FALSE;
+ vp->translated = false;
}
void
@@ -58,8 +58,8 @@ nv30_vertprog_validate(struct nv30_context *nv30)
struct nouveau_object *eng3d = nv30->screen->eng3d;
struct nv30_vertprog *vp = nv30->vertprog.program;
struct nv30_fragprog *fp = nv30->fragprog.program;
- boolean upload_code = FALSE;
- boolean upload_data = FALSE;
+ bool upload_code = false;
+ bool upload_data = false;
unsigned i;
if (nv30->dirty & NV30_NEW_FRAGPROG) {
@@ -125,7 +125,7 @@ nv30_vertprog_validate(struct nv30_context *nv30)
}
}
- upload_code = TRUE;
+ upload_code = true;
}
if (vp->nr_consts && !vp->data) {
@@ -166,8 +166,8 @@ nv30_vertprog_validate(struct nv30_context *nv30)
}
}
- upload_code = TRUE;
- upload_data = TRUE;
+ upload_code = true;
+ upload_data = true;
}
if (vp->nr_consts) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_winsys.h b/src/gallium/drivers/nouveau/nv30/nv30_winsys.h
index 5cee5df60ce..2324b517c44 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_winsys.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_winsys.h
@@ -19,34 +19,34 @@
#define NV40_3D_PRIM_RESTART_ENABLE 0x1dac
#define NV40_3D_PRIM_RESTART_INDEX 0x1db0
-static INLINE void
+static inline void
PUSH_RELOC(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t offset,
uint32_t flags, uint32_t vor, uint32_t tor)
{
nouveau_pushbuf_reloc(push, bo, offset, flags, vor, tor);
}
-static INLINE struct nouveau_bufctx *
+static inline struct nouveau_bufctx *
bufctx(struct nouveau_pushbuf *push)
{
struct nouveau_bufctx **pctx = push->user_priv;
return *pctx;
}
-static INLINE void
+static inline void
PUSH_RESET(struct nouveau_pushbuf *push, int bin)
{
nouveau_bufctx_reset(bufctx(push), bin);
}
-static INLINE void
+static inline void
PUSH_REFN(struct nouveau_pushbuf *push, int bin,
struct nouveau_bo *bo, uint32_t access)
{
nouveau_bufctx_refn(bufctx(push), bin, bo, access);
}
-static INLINE void
+static inline void
PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nouveau_bo *bo, uint32_t offset, uint32_t access)
{
@@ -55,7 +55,7 @@ PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
PUSH_DATA(push, bo->offset + offset);
}
-static INLINE void
+static inline void
PUSH_MTHDo(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nouveau_bo *bo, uint32_t access, uint32_t vor, uint32_t tor)
{
@@ -67,7 +67,7 @@ PUSH_MTHDo(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
PUSH_DATA(push, tor);
}
-static INLINE void
+static inline void
PUSH_MTHDs(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nouveau_bo *bo, uint32_t data, uint32_t access,
uint32_t vor, uint32_t tor)
@@ -80,7 +80,7 @@ PUSH_MTHDs(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
PUSH_DATA(push, data | tor);
}
-static INLINE struct nouveau_bufref *
+static inline struct nouveau_bufref *
PUSH_MTHD(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nouveau_bo *bo, uint32_t data, uint32_t access,
uint32_t vor, uint32_t tor)
@@ -99,7 +99,7 @@ PUSH_MTHD(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
return bref;
}
-static INLINE void
+static inline void
PUSH_RESRC(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nv04_resource *r, uint32_t data, uint32_t access,
uint32_t vor, uint32_t tor)
@@ -108,14 +108,14 @@ PUSH_RESRC(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
r->domain | access, vor, tor)->priv = r;
}
-static INLINE void
+static inline void
BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
{
PUSH_SPACE(push, size + 1);
PUSH_DATA (push, 0x00000000 | (size << 18) | (subc << 13) | mthd);
}
-static INLINE void
+static inline void
BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
{
PUSH_SPACE(push, size + 1);
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
index 9ef16965f39..e68d23e5587 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
@@ -44,7 +44,7 @@ struct nvfx_fpc {
struct util_dynarray label_relocs;
};
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
temp(struct nvfx_fpc *fpc)
{
int idx = __builtin_ctzll(~fpc->r_temps);
@@ -60,7 +60,7 @@ temp(struct nvfx_fpc *fpc)
return nvfx_reg(NVFXSR_TEMP, idx);
}
-static INLINE void
+static inline void
release_temps(struct nvfx_fpc *fpc)
{
fpc->r_temps &= ~fpc->r_temps_discard;
@@ -373,7 +373,7 @@ nv40_fp_brk(struct nvfx_fpc *fpc)
hw[3] = 0;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
{
struct nvfx_src src;
@@ -415,7 +415,7 @@ tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
return src;
}
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
@@ -430,7 +430,7 @@ tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
}
}
-static INLINE int
+static inline int
tgsi_mask(uint tgsi)
{
int mask = 0;
@@ -442,7 +442,7 @@ tgsi_mask(uint tgsi)
return mask;
}
-static boolean
+static bool
nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
const struct tgsi_full_instruction *finst)
{
@@ -455,7 +455,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
int i;
if (finst->Instruction.Opcode == TGSI_OPCODE_END)
- return TRUE;
+ return true;
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
@@ -525,7 +525,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
break;
default:
NOUVEAU_ERR("bad src file\n");
- return FALSE;
+ return false;
}
}
@@ -868,12 +868,12 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
default:
NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
- return FALSE;
+ return false;
}
out:
release_temps(fpc);
- return TRUE;
+ return true;
nv3x_cflow:
{
static int warned = 0;
@@ -887,7 +887,7 @@ nv3x_cflow:
goto out;
}
-static boolean
+static bool
nvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
@@ -917,17 +917,17 @@ nvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc,
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_PCOORD:
/* will be assigned to remaining TC slots later */
- return TRUE;
+ return true;
default:
assert(0);
- return FALSE;
+ return false;
}
fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
- return TRUE;
+ return true;
}
-static boolean
+static bool
nvfx_fragprog_assign_generic(struct nvfx_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
@@ -954,16 +954,16 @@ nvfx_fragprog_assign_generic(struct nvfx_fpc *fpc,
}
hw = NVFX_FP_OP_INPUT_SRC_TC(hw);
fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
- return TRUE;
+ return true;
}
}
- return FALSE;
+ return false;
default:
- return TRUE;
+ return true;
}
}
-static boolean
+static bool
nvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
@@ -984,20 +984,20 @@ nvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc,
}
if(hw > ((fpc->is_nv4x) ? 4 : 2)) {
NOUVEAU_ERR("bad rcol index\n");
- return FALSE;
+ return false;
}
break;
default:
NOUVEAU_ERR("bad output semantic\n");
- return FALSE;
+ return false;
}
fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
fpc->r_temps |= (1ULL << hw);
- return TRUE;
+ return true;
}
-static boolean
+static bool
nvfx_fragprog_prepare(struct nvfx_fpc *fpc)
{
struct tgsi_parse_context p;
@@ -1081,17 +1081,17 @@ nvfx_fragprog_prepare(struct nvfx_fpc *fpc)
fpc->r_temps_discard = 0ULL;
}
- return TRUE;
+ return true;
out_err:
FREE(fpc->r_temp);
fpc->r_temp = NULL;
tgsi_parse_free(&p);
- return FALSE;
+ return false;
}
-DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", false)
void
_nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp)
@@ -1100,7 +1100,7 @@ _nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp)
struct nvfx_fpc *fpc = NULL;
struct util_dynarray insns;
- fp->translated = FALSE;
+ fp->translated = false;
fp->point_sprite_control = 0;
fp->vp_or = 0;
@@ -1182,7 +1182,7 @@ _nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp)
debug_printf("\n");
}
- fp->translated = TRUE;
+ fp->translated = true;
out:
tgsi_parse_free(&parse);
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
index 9538a793d7e..e66d8af7620 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
@@ -448,8 +448,8 @@ struct nvfx_insn
struct nvfx_src src[3];
};
-static INLINE struct nvfx_insn
-nvfx_insn(boolean sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2)
+static inline struct nvfx_insn
+nvfx_insn(bool sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2)
{
struct nvfx_insn insn = {
.op = op,
@@ -468,7 +468,7 @@ nvfx_insn(boolean sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask
return insn;
}
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
nvfx_reg(int type, int index)
{
struct nvfx_reg temp = {
@@ -478,7 +478,7 @@ nvfx_reg(int type, int index)
return temp;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
nvfx_src(struct nvfx_reg reg)
{
struct nvfx_src temp = {
@@ -491,7 +491,7 @@ nvfx_src(struct nvfx_reg reg)
return temp;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
nvfx_src_swz(struct nvfx_src src, int x, int y, int z, int w)
{
struct nvfx_src dst = src;
@@ -503,14 +503,14 @@ nvfx_src_swz(struct nvfx_src src, int x, int y, int z, int w)
return dst;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
nvfx_src_neg(struct nvfx_src src)
{
src.negate = !src.negate;
return src;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
nvfx_src_abs(struct nvfx_src src)
{
src.abs = 1;
@@ -529,7 +529,7 @@ struct nv30_vertprog;
void
_nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp);
-boolean
+bool
_nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp);
#endif
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
index 1ce0589be71..5757eb1fb16 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
@@ -416,7 +416,7 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
return src;
}
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
struct nvfx_reg dst;
@@ -455,7 +455,7 @@ tgsi_mask(uint tgsi)
return mask;
}
-static boolean
+static bool
nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
unsigned idx, const struct tgsi_full_instruction *finst)
{
@@ -466,7 +466,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
struct nvfx_insn insn;
struct nvfx_relocation reloc;
struct nvfx_loop_entry loop;
- boolean sat = FALSE;
+ bool sat = false;
int mask;
int ai = -1, ci = -1, ii = -1;
int i;
@@ -524,25 +524,25 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
break;
default:
NOUVEAU_ERR("bad src file\n");
- return FALSE;
+ return false;
}
}
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
if(src[i].reg.type < 0)
- return FALSE;
+ return false;
}
if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS &&
finst->Instruction.Opcode != TGSI_OPCODE_ARL)
- return FALSE;
+ return false;
final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]);
mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
if(finst->Instruction.Saturate) {
assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL);
if (vpc->is_nv4x)
- sat = TRUE;
+ sat = true;
else
if(dst.type != NVFXSR_TEMP)
dst = temp(vpc);
@@ -793,7 +793,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
break;
default:
NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
- return FALSE;
+ return false;
}
if(finst->Instruction.Saturate && !vpc->is_nv4x) {
@@ -804,10 +804,10 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
}
release_temps(vpc);
- return TRUE;
+ return true;
}
-static boolean
+static bool
nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
const struct tgsi_full_declaration *fdec)
{
@@ -825,7 +825,7 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
vpc->r_result[idx] = temp(vpc);
vpc->r_temps_discard = 0;
vpc->cvtx_idx = idx;
- return TRUE;
+ return true;
case TGSI_SEMANTIC_COLOR:
if (fdec->Semantic.Index == 0) {
hw = NVFX_VP(INST_DEST_COL0);
@@ -834,7 +834,7 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
hw = NVFX_VP(INST_DEST_COL1);
} else {
NOUVEAU_ERR("bad colour semantic index\n");
- return FALSE;
+ return false;
}
break;
case TGSI_SEMANTIC_BCOLOR:
@@ -845,7 +845,7 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
hw = NVFX_VP(INST_DEST_BFC1);
} else {
NOUVEAU_ERR("bad bcolour semantic index\n");
- return FALSE;
+ return false;
}
break;
case TGSI_SEMANTIC_FOG:
@@ -868,22 +868,22 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
if (i == num_texcoords) {
vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);
- return TRUE;
+ return true;
}
break;
case TGSI_SEMANTIC_EDGEFLAG:
vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);
- return TRUE;
+ return true;
default:
NOUVEAU_ERR("bad output semantic\n");
- return FALSE;
+ return false;
}
vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
- return TRUE;
+ return true;
}
-static boolean
+static bool
nvfx_vertprog_prepare(struct nvfx_vpc *vpc)
{
struct tgsi_parse_context p;
@@ -924,7 +924,7 @@ nvfx_vertprog_prepare(struct nvfx_vpc *vpc)
break;
case TGSI_FILE_OUTPUT:
if (!nvfx_vertprog_parse_decl_output(vpc, fdec))
- return FALSE;
+ return false;
break;
default:
break;
@@ -961,12 +961,12 @@ nvfx_vertprog_prepare(struct nvfx_vpc *vpc)
}
vpc->r_temps_discard = 0;
- return TRUE;
+ return true;
}
-DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", false)
-boolean
+bool
_nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)
{
struct tgsi_parse_context parse;
@@ -975,13 +975,13 @@ _nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)
struct util_dynarray insns;
int i, ucps;
- vp->translated = FALSE;
+ vp->translated = false;
vp->nr_insns = 0;
vp->nr_consts = 0;
vpc = CALLOC_STRUCT(nvfx_vpc);
if (!vpc)
- return FALSE;
+ return false;
vpc->is_nv4x = (oclass >= NV40_3D_CLASS) ? ~0 : 0;
vpc->vp = vp;
vpc->pipe = vp->pipe;
@@ -990,7 +990,7 @@ _nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)
if (!nvfx_vertprog_prepare(vpc)) {
FREE(vpc);
- return FALSE;
+ return false;
}
/* Redirect post-transform vertex position to a temp if user clip
@@ -1108,7 +1108,7 @@ _nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)
debug_printf("\n");
}
- vp->translated = TRUE;
+ vp->translated = true;
out:
tgsi_parse_free(&parse);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_blit.h b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
index 756c4c11bf6..0ccec568d3a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_blit.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
@@ -37,7 +37,7 @@ nv50_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
#define NV50_BLIT_TEXTURE_2D_ARRAY 5
#define NV50_BLIT_MAX_TEXTURE_TYPES 6
-static INLINE unsigned
+static inline unsigned
nv50_blit_texture_type(enum pipe_texture_target target)
{
switch (target) {
@@ -52,7 +52,7 @@ nv50_blit_texture_type(enum pipe_texture_target target)
}
}
-static INLINE unsigned
+static inline unsigned
nv50_blit_get_tgsi_texture_target(enum pipe_texture_target target)
{
switch (target) {
@@ -67,7 +67,7 @@ nv50_blit_get_tgsi_texture_target(enum pipe_texture_target target)
}
}
-static INLINE enum pipe_texture_target
+static inline enum pipe_texture_target
nv50_blit_reinterpret_pipe_texture_target(enum pipe_texture_target target)
{
switch (target) {
@@ -81,7 +81,7 @@ nv50_blit_reinterpret_pipe_texture_target(enum pipe_texture_target target)
}
}
-static INLINE unsigned
+static inline unsigned
nv50_blit_get_filter(const struct pipe_blit_info *info)
{
if (info->dst.resource->nr_samples < info->src.resource->nr_samples)
@@ -102,7 +102,7 @@ nv50_blit_get_filter(const struct pipe_blit_info *info)
/* Since shaders cannot export stencil, we cannot copy stencil values when
* rendering to ZETA, so we attach the ZS surface to a colour render target.
*/
-static INLINE enum pipe_format
+static inline enum pipe_format
nv50_blit_zeta_to_colour_format(enum pipe_format format)
{
switch (format) {
@@ -127,7 +127,7 @@ nv50_blit_zeta_to_colour_format(enum pipe_format format)
}
-static INLINE uint16_t
+static inline uint16_t
nv50_blit_derive_color_mask(const struct pipe_blit_info *info)
{
const unsigned mask = info->mask;
@@ -162,7 +162,7 @@ nv50_blit_derive_color_mask(const struct pipe_blit_info *info)
return color_mask;
}
-static INLINE uint32_t
+static inline uint32_t
nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info)
{
uint32_t mask = 0;
@@ -191,8 +191,8 @@ nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info)
# define nv50_format_table nvc0_format_table
#endif
-/* return TRUE for formats that can be converted among each other by NVC0_2D */
-static INLINE boolean
+/* return true for formats that can be converted among each other by NVC0_2D */
+static inline bool
nv50_2d_dst_format_faithful(enum pipe_format format)
{
const uint64_t mask =
@@ -201,7 +201,7 @@ nv50_2d_dst_format_faithful(enum pipe_format format)
uint8_t id = nv50_format_table[format].rt;
return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
}
-static INLINE boolean
+static inline bool
nv50_2d_src_format_faithful(enum pipe_format format)
{
const uint64_t mask =
@@ -211,7 +211,7 @@ nv50_2d_src_format_faithful(enum pipe_format format)
return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
}
-static INLINE boolean
+static inline bool
nv50_2d_format_supported(enum pipe_format format)
{
uint8_t id = nv50_format_table[format].rt;
@@ -219,7 +219,7 @@ nv50_2d_format_supported(enum pipe_format format)
(NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
}
-static INLINE boolean
+static inline bool
nv50_2d_dst_format_ops_supported(enum pipe_format format)
{
uint8_t id = nv50_format_table[format].rt;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 5b5d3912c20..f8d46db7c67 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -64,12 +64,12 @@ nv50_memory_barrier(struct pipe_context *pipe, unsigned flags)
if (!nv50->vtxbuf[i].buffer)
continue;
if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
}
if (nv50->idxbuf.buffer &&
nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
uint32_t valid = nv50->constbuf_valid[s];
@@ -87,7 +87,7 @@ nv50_memory_barrier(struct pipe_context *pipe, unsigned flags)
continue;
if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv50->cb_dirty = TRUE;
+ nv50->cb_dirty = true;
}
}
}
@@ -100,9 +100,9 @@ nv50_default_kick_notify(struct nouveau_pushbuf *push)
if (screen) {
nouveau_fence_next(&screen->base);
- nouveau_fence_update(&screen->base, TRUE);
+ nouveau_fence_update(&screen->base, true);
if (screen->cur_ctx)
- screen->cur_ctx->state.flushed = TRUE;
+ screen->cur_ctx->state.flushed = true;
}
}
@@ -310,7 +310,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage;
if (screen->base.device->chipset < 0x84 ||
- debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+ debug_get_bool_option("NOUVEAU_PMPEG", false)) {
/* PMPEG */
nouveau_context_init_vdec(&nv50->base);
} else if (screen->base.device->chipset < 0x98 ||
@@ -351,7 +351,7 @@ out_err:
}
void
-nv50_bufctx_fence(struct nouveau_bufctx *bufctx, boolean on_flush)
+nv50_bufctx_fence(struct nouveau_bufctx *bufctx, bool on_flush)
{
struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
struct nouveau_list *it;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 1f123ef7e92..ce12e714774 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -91,7 +91,7 @@
struct nv50_blitctx;
-boolean nv50_blitctx_create(struct nv50_context *);
+bool nv50_blitctx_create(struct nv50_context *);
struct nv50_context {
struct nouveau_context base;
@@ -102,7 +102,7 @@ struct nv50_context {
struct nouveau_bufctx *bufctx;
uint32_t dirty;
- boolean cb_dirty;
+ bool cb_dirty;
struct nv50_graph_state state;
@@ -152,26 +152,26 @@ struct nv50_context {
unsigned sample_mask;
unsigned min_samples;
- boolean vbo_push_hint;
+ bool vbo_push_hint;
uint32_t rt_array_mode;
struct pipe_query *cond_query;
- boolean cond_cond; /* inverted rendering condition */
+ bool cond_cond; /* inverted rendering condition */
uint cond_mode;
uint32_t cond_condmode; /* the calculated condition */
struct nv50_blitctx *blit;
};
-static INLINE struct nv50_context *
+static inline struct nv50_context *
nv50_context(struct pipe_context *pipe)
{
return (struct nv50_context *)pipe;
}
/* return index used in nv50_context arrays for a specific shader type */
-static INLINE unsigned
+static inline unsigned
nv50_context_shader_stage(unsigned pipe)
{
switch (pipe) {
@@ -188,7 +188,7 @@ nv50_context_shader_stage(unsigned pipe)
/* nv50_context.c */
struct pipe_context *nv50_create(struct pipe_screen *, void *);
-void nv50_bufctx_fence(struct nouveau_bufctx *, boolean on_flush);
+void nv50_bufctx_fence(struct nouveau_bufctx *, bool on_flush);
void nv50_default_kick_notify(struct nouveau_pushbuf *);
@@ -202,7 +202,7 @@ void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
void nva0_so_target_save_offset(struct pipe_context *,
struct pipe_stream_output_target *,
- unsigned index, boolean seralize);
+ unsigned index, bool seralize);
#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
@@ -221,8 +221,8 @@ extern void nv50_init_state_functions(struct nv50_context *);
/* nv50_state_validate.c */
/* @words: check for space before emitting relocs */
-extern boolean nv50_state_validate(struct nv50_context *, uint32_t state_mask,
- unsigned space_words);
+extern bool nv50_state_validate(struct nv50_context *, uint32_t state_mask,
+ unsigned space_words);
/* nv50_surface.c */
extern void nv50_clear(struct pipe_context *, unsigned buffers,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 0f86ba1de0d..49a93bf1d91 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -44,7 +44,7 @@
*/
#define U_V PIPE_BIND_VERTEX_BUFFER
#define U_T PIPE_BIND_SAMPLER_VIEW
-#define U_I PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE
+#define U_I PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE
#define U_TR PIPE_BIND_RENDER_TARGET | U_T
#define U_IR U_TR | U_I
#define U_TB PIPE_BIND_BLENDABLE | U_TR
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index f15d8f3ecb6..92d49e49ff2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -30,7 +30,7 @@
uint32_t
nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
- boolean is_3d)
+ bool is_3d)
{
uint32_t tile_mode = 0x000;
@@ -59,13 +59,13 @@ nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
}
static uint32_t
-nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d)
+nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
{
return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz, is_3d);
}
static uint32_t
-nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+nv50_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
{
const unsigned ms = util_logbase2(mt->base.base.nr_samples);
uint32_t tile_flags;
@@ -184,7 +184,7 @@ nv50_miptree_get_handle(struct pipe_screen *pscreen,
unsigned stride;
if (!mt || !mt->base.bo)
- return FALSE;
+ return false;
stride = mt->level[0].pitch;
@@ -204,7 +204,7 @@ const struct u_resource_vtbl nv50_miptree_vtbl =
u_default_transfer_inline_write /* transfer_inline_write */
};
-static INLINE boolean
+static inline bool
nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
{
switch (mt->base.base.nr_samples) {
@@ -228,12 +228,12 @@ nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
break;
default:
NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
-boolean
+bool
nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align)
{
struct pipe_resource *pt = &mt->base.base;
@@ -241,12 +241,12 @@ nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align)
unsigned h = pt->height0;
if (util_format_is_depth_or_stencil(pt->format))
- return FALSE;
+ return false;
if ((pt->last_level > 0) || (pt->depth0 > 1) || (pt->array_size > 1))
- return FALSE;
+ return false;
if (mt->ms_x | mt->ms_y)
- return FALSE;
+ return false;
mt->level[0].pitch = align(pt->width0 * blocksize, pitch_align);
@@ -256,7 +256,7 @@ nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align)
mt->total_size = mt->level[0].pitch * h;
- return TRUE;
+ return true;
}
static void
@@ -335,7 +335,7 @@ nv50_miptree_create(struct pipe_screen *pscreen,
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
struct pipe_resource *pt = &mt->base.base;
- boolean compressed = dev->drm_version >= 0x01000101;
+ bool compressed = dev->drm_version >= 0x01000101;
int ret;
union nouveau_bo_config bo_config;
uint32_t bo_flags;
@@ -438,7 +438,7 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen,
/* Offset of zslice @z from start of level @l. */
-INLINE unsigned
+inline unsigned
nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
{
const struct pipe_resource *pt = &mt->base.base;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index aaca4c550d9..02dc3677259 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -25,7 +25,7 @@
#include "codegen/nv50_ir_driver.h"
-static INLINE unsigned
+static inline unsigned
bitcount4(const uint32_t val)
{
static const uint8_t cnt[16]
@@ -104,7 +104,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
prog->vp.bfc[info->out[i].si] = i;
break;
case TGSI_SEMANTIC_LAYER:
- prog->gp.has_layer = TRUE;
+ prog->gp.has_layer = true;
prog->gp.layerid = n;
break;
case TGSI_SEMANTIC_VIEWPORT_INDEX:
@@ -316,7 +316,7 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
return so;
}
-boolean
+bool
nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
{
struct nv50_ir_prog_info *info;
@@ -325,7 +325,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
- return FALSE;
+ return false;
info->type = prog->type;
info->target = chipset;
@@ -410,7 +410,7 @@ out:
return !ret;
}
-boolean
+bool
nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
{
struct nouveau_heap *heap;
@@ -423,7 +423,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
default:
assert(!"invalid program type");
- return FALSE;
+ return false;
}
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
@@ -440,7 +440,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
if (ret) {
NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
- return FALSE;
+ return false;
}
}
prog->code_base = prog->mem->start;
@@ -448,10 +448,10 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
if (ret < 0) {
nouveau_heap_free(&prog->mem);
- return FALSE;
+ return false;
}
if (ret > 0)
- nv50->state.new_tls_space = TRUE;
+ nv50->state.new_tls_space = true;
if (prog->fixups)
nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
@@ -463,7 +463,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
PUSH_DATA (nv50->base.pushbuf, 0);
- return TRUE;
+ return true;
}
void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index fe6bd6025be..5d3ff5644d2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -53,7 +53,7 @@ struct nv50_program {
struct pipe_shader_state pipe;
ubyte type;
- boolean translated;
+ bool translated;
uint32_t *code;
unsigned code_size;
@@ -104,8 +104,8 @@ struct nv50_program {
struct nv50_stream_output_state *so;
};
-boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);
-boolean nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
+bool nv50_program_translate(struct nv50_program *, uint16_t chipset);
+bool nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
#endif /* __NV50_PROG_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
index a3a397c52c1..f31eaa0e314 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_push.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
@@ -23,13 +23,13 @@ struct push_context {
struct translate *translate;
- boolean primitive_restart;
+ bool primitive_restart;
uint32_t prim;
uint32_t restart_index;
uint32_t instance_id;
};
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
{
unsigned i;
@@ -39,7 +39,7 @@ prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
{
unsigned i;
@@ -49,7 +49,7 @@ prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
{
unsigned i;
@@ -179,7 +179,7 @@ emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
#define NV50_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
-static INLINE unsigned
+static inline unsigned
nv50_prim_gl(unsigned prim)
{
switch (prim) {
@@ -212,7 +212,7 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
unsigned i, index_size;
unsigned inst_count = info->instance_count;
unsigned vert_count = info->count;
- boolean apply_bias = info->indexed && info->index_bias;
+ bool apply_bias = info->indexed && info->index_bias;
ctx.push = nv50->base.pushbuf;
ctx.translate = nv50->vertex->translate;
@@ -258,12 +258,12 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
return;
}
- pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count);
vert_count /= targ->stride;
}
ctx.idxbuf = NULL;
index_size = 0;
- ctx.primitive_restart = FALSE;
+ ctx.primitive_restart = false;
ctx.restart_index = 0;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 81f7474e36b..f4adbf8c653 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -48,20 +48,21 @@ struct nv50_query {
uint32_t base;
uint32_t offset; /* base + i * 32 */
uint8_t state;
- boolean is64bit;
+ bool is64bit;
+ int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
};
#define NV50_QUERY_ALLOC_SPACE 256
-static INLINE struct nv50_query *
+static inline struct nv50_query *
nv50_query(struct pipe_query *pipe)
{
return (struct nv50_query *)pipe;
}
-static boolean
+static bool
nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
{
struct nv50_screen *screen = nv50->screen;
@@ -80,17 +81,17 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
if (size) {
q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
if (!q->bo)
- return FALSE;
+ return false;
q->offset = q->base;
ret = nouveau_bo_map(q->bo, 0, screen->base.client);
if (ret) {
nv50_query_allocate(nv50, q, 0);
- return FALSE;
+ return false;
}
q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
}
- return TRUE;
+ return true;
}
static void
@@ -153,8 +154,8 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nv50_query *q = nv50_query(pq);
/* For occlusion queries we have to change the storage, because a previous
- * query might set the initial render conition to FALSE even *after* we re-
- * initialized it to TRUE.
+ * query might set the initial render conition to false even *after* we re-
+ * initialized it to true.
*/
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
q->offset += 32;
@@ -166,7 +167,7 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
* query ?
*/
q->data[0] = q->sequence; /* initialize sequence */
- q->data[1] = 1; /* initial render condition = TRUE */
+ q->data[1] = 1; /* initial render condition = true */
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
q->data[5] = 0;
}
@@ -175,11 +176,16 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
- PUSH_SPACE(push, 4);
- BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
- PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
- BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
- PUSH_DATA (push, 1);
+ q->nesting = nv50->screen->num_occlusion_queries_active++;
+ if (q->nesting) {
+ nv50_query_get(push, q, 0x10, 0x0100f002);
+ } else {
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ }
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nv50_query_get(push, q, 0x10, 0x06805002);
@@ -223,9 +229,11 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
nv50_query_get(push, q, 0, 0x0100f002);
- PUSH_SPACE(push, 2);
- BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
- PUSH_DATA (push, 0);
+ if (--nv50->screen->num_occlusion_queries_active == 0) {
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nv50_query_get(push, q, 0, 0x06805002);
@@ -261,7 +269,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* This query is not issued on GPU because disjoint is forced to FALSE */
+ /* This query is not issued on GPU because disjoint is forced to false */
q->state = NV50_QUERY_STATE_READY;
break;
default:
@@ -273,7 +281,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence);
}
-static INLINE void
+static inline void
nv50_query_update(struct nv50_query *q)
{
if (q->is64bit) {
@@ -293,7 +301,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
struct nv50_query *q = nv50_query(pq);
uint64_t *res64 = (uint64_t *)result;
uint32_t *res32 = (uint32_t *)result;
- boolean *res8 = (boolean *)result;
+ uint8_t *res8 = (uint8_t *)result;
uint64_t *data64 = (uint64_t *)q->data;
int i;
@@ -307,19 +315,19 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
q->state = NV50_QUERY_STATE_FLUSHED;
PUSH_KICK(nv50->base.pushbuf);
}
- return FALSE;
+ return false;
}
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
- return FALSE;
+ return false;
}
q->state = NV50_QUERY_STATE_READY;
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
- res8[0] = TRUE;
+ res8[0] = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
- res64[0] = q->data[1];
+ res64[0] = q->data[1] - q->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
@@ -338,7 +346,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
res64[0] = 1000000000;
- res8[8] = FALSE;
+ res8[8] = false;
break;
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
@@ -347,10 +355,10 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
res32[0] = q->data[1];
break;
default:
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
void
@@ -377,7 +385,7 @@ nv50_render_condition(struct pipe_context *pipe,
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q;
uint32_t cond;
- boolean wait =
+ bool wait =
mode != PIPE_RENDER_COND_NO_WAIT &&
mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
@@ -391,13 +399,12 @@ nv50_render_condition(struct pipe_context *pipe,
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
cond = condition ? NV50_3D_COND_MODE_EQUAL :
NV50_3D_COND_MODE_NOT_EQUAL;
- wait = TRUE;
+ wait = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
if (likely(!condition)) {
- /* XXX: Placeholder, handle nesting here if available */
- if (unlikely(false))
+ if (unlikely(q->nesting))
cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
NV50_3D_COND_MODE_ALWAYS;
else
@@ -461,7 +468,7 @@ nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
void
nva0_so_target_save_offset(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg,
- unsigned index, boolean serialize)
+ unsigned index, bool serialize)
{
struct nv50_so_target *targ = nv50_so_target(ptarg);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
index f7ee1354a92..a46e622c597 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -35,7 +35,7 @@ nv50_screen_init_resource_functions(struct pipe_screen *pscreen);
uint32_t
nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
- boolean is_3d);
+ bool is_3d);
struct nv50_miptree_level {
uint32_t offset;
@@ -50,13 +50,13 @@ struct nv50_miptree {
struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS];
uint32_t total_size;
uint32_t layer_stride;
- boolean layout_3d; /* TRUE if layer count varies with mip level */
+ bool layout_3d; /* true if layer count varies with mip level */
uint8_t ms_x; /* log2 of number of samples in x/y dimension */
uint8_t ms_y;
uint8_t ms_mode;
};
-static INLINE struct nv50_miptree *
+static inline struct nv50_miptree *
nv50_miptree(struct pipe_resource *pt)
{
return (struct nv50_miptree *)pt;
@@ -70,7 +70,7 @@ nv50_miptree(struct pipe_resource *pt)
/* Internal functions:
*/
-boolean
+bool
nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align);
struct pipe_resource *
@@ -98,13 +98,13 @@ struct nv50_surface {
uint16_t depth;
};
-static INLINE struct nv50_surface *
+static inline struct nv50_surface *
nv50_surface(struct pipe_surface *ps)
{
return (struct nv50_surface *)ps;
}
-static INLINE enum pipe_format
+static inline enum pipe_format
nv50_zs_to_s_format(enum pipe_format format)
{
switch (format) {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6583a353578..30e6e042fbf 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -51,19 +51,19 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
unsigned bindings)
{
if (sample_count > 8)
- return FALSE;
+ return false;
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
- return FALSE;
+ return false;
if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
- return FALSE;
+ return false;
if (!util_format_is_supported(format, bindings))
- return FALSE;
+ return false;
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
- return FALSE;
+ return false;
break;
default:
break;
@@ -176,6 +176,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -210,6 +213,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -286,7 +290,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
/* The chip could handle more sampler views than samplers */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
- return MIN2(32, PIPE_MAX_SAMPLERS);
+ return MIN2(16, PIPE_MAX_SAMPLERS);
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
@@ -454,7 +458,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
PUSH_DATA (push, 0xf);
- if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+ if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
PUSH_DATA (push, 0x18);
}
@@ -734,7 +738,7 @@ nv50_screen_create(struct nouveau_device *dev)
nv50_screen_init_resource_functions(pscreen);
if (screen->base.device->chipset < 0x84 ||
- debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+ debug_get_bool_option("NOUVEAU_PMPEG", false)) {
/* PMPEG */
nouveau_screen_init_vdec(&screen->base);
} else if (screen->base.device->chipset < 0x98 ||
@@ -890,7 +894,7 @@ nv50_screen_create(struct nouveau_device *dev)
nv50_screen_init_hwctx(screen);
- nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 881051b1862..ce51f0fc254 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -32,14 +32,14 @@ struct nv50_graph_state {
uint32_t semantic_color;
uint32_t semantic_psize;
int32_t index_bias;
- boolean uniform_buffer_bound[3];
- boolean prim_restart;
- boolean point_sprite;
- boolean rt_serialize;
- boolean flushed;
- boolean rasterizer_discard;
+ bool uniform_buffer_bound[3];
+ bool prim_restart;
+ bool point_sprite;
+ bool rt_serialize;
+ bool flushed;
+ bool rasterizer_discard;
uint8_t tls_required;
- boolean new_tls_space;
+ bool new_tls_space;
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
uint8_t num_textures[3];
@@ -54,6 +54,8 @@ struct nv50_screen {
struct nv50_context *cur_ctx;
struct nv50_graph_state save_state;
+ int num_occlusion_queries_active;
+
struct nouveau_bo *code;
struct nouveau_bo *uniforms;
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
@@ -95,19 +97,19 @@ struct nv50_screen {
struct nouveau_object *m2mf;
};
-static INLINE struct nv50_screen *
+static inline struct nv50_screen *
nv50_screen(struct pipe_screen *screen)
{
return (struct nv50_screen *)screen;
}
-boolean nv50_blitter_create(struct nv50_screen *);
+bool nv50_blitter_create(struct nv50_screen *);
void nv50_blitter_destroy(struct nv50_screen *);
int nv50_screen_tic_alloc(struct nv50_screen *, void *);
int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
-static INLINE void
+static inline void
nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
{
struct nv50_screen *screen = nv50_screen(res->base.screen);
@@ -119,7 +121,7 @@ nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
}
}
-static INLINE void
+static inline void
nv50_resource_validate(struct nv04_resource *res, uint32_t flags)
{
if (likely(res->bo)) {
@@ -142,21 +144,21 @@ struct nv50_format {
extern const struct nv50_format nv50_format_table[];
-static INLINE void
+static inline void
nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic)
{
if (tic->id >= 0)
screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
}
-static INLINE void
+static inline void
nv50_screen_tsc_unlock(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
{
if (tsc->id >= 0)
screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
}
-static INLINE void
+static inline void
nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic)
{
if (tic->id >= 0) {
@@ -165,7 +167,7 @@ nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic)
}
}
-static INLINE void
+static inline void
nv50_screen_tsc_free(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
{
if (tsc->id >= 0) {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index c698782d8bd..b033ce5c6dc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -60,7 +60,7 @@ nv50_constbufs_validate(struct nv50_context *nv50)
continue;
}
if (!nv50->state.uniform_buffer_bound[s]) {
- nv50->state.uniform_buffer_bound[s] = TRUE;
+ nv50->state.uniform_buffer_bound[s] = true;
BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
}
@@ -99,33 +99,35 @@ nv50_constbufs_validate(struct nv50_context *nv50)
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
+
+ nv50->cb_dirty = 1; /* Force cache flush for UBO. */
} else {
BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
PUSH_DATA (push, (i << 8) | p | 0);
}
if (i == 0)
- nv50->state.uniform_buffer_bound[s] = FALSE;
+ nv50->state.uniform_buffer_bound[s] = false;
}
}
}
}
-static boolean
+static bool
nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
{
if (!prog->translated) {
prog->translated = nv50_program_translate(
prog, nv50->screen->base.device->chipset);
if (!prog->translated)
- return FALSE;
+ return false;
} else
if (prog->mem)
- return TRUE;
+ return true;
return nv50_program_upload_code(nv50, prog);
}
-static INLINE void
+static inline void
nv50_program_update_context_state(struct nv50_context *nv50,
struct nv50_program *prog, int stage)
{
@@ -136,7 +138,7 @@ nv50_program_update_context_state(struct nv50_context *nv50,
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
if (!nv50->state.tls_required || nv50->state.new_tls_space)
BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
- nv50->state.new_tls_space = FALSE;
+ nv50->state.new_tls_space = false;
nv50->state.tls_required |= 1 << stage;
} else {
if (nv50->state.tls_required == (1 << stage))
@@ -243,11 +245,11 @@ nv50_sprite_coords_validate(struct nv50_context *nv50)
for (i = 0; i < 8; ++i)
PUSH_DATA(push, 0);
- nv50->state.point_sprite = FALSE;
+ nv50->state.point_sprite = false;
}
return;
} else {
- nv50->state.point_sprite = TRUE;
+ nv50->state.point_sprite = true;
}
memset(pntc, 0, sizeof(pntc));
@@ -646,7 +648,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
nv50_query_pushbuf_submit(push, targ->pq, 0x4);
} else {
PUSH_DATA(push, 0);
- targ->clean = FALSE;
+ targ->clean = false;
}
} else {
const unsigned limit = targ->pipe.buffer_size /
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index d4d41af3c61..9505a0b4085 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -62,7 +62,7 @@
* in advance to maintain elegant separate shader objects.)
*/
-static INLINE uint32_t
+static inline uint32_t
nv50_colormask(unsigned mask)
{
uint32_t ret = 0;
@@ -82,7 +82,7 @@ nv50_colormask(unsigned mask)
#define NV50_BLEND_FACTOR_CASE(a, b) \
case PIPE_BLENDFACTOR_##a: return NV50_BLEND_FACTOR_##b
-static INLINE uint32_t
+static inline uint32_t
nv50_blend_fac(unsigned factor)
{
switch (factor) {
@@ -116,7 +116,7 @@ nv50_blend_state_create(struct pipe_context *pipe,
{
struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
int i;
- boolean emit_common_func = cso->rt[0].blend_enable;
+ bool emit_common_func = cso->rt[0].blend_enable;
uint32_t ms;
if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
@@ -137,11 +137,11 @@ nv50_blend_state_create(struct pipe_context *pipe,
for (i = 0; i < 8; ++i) {
SB_DATA(so, cso->rt[i].blend_enable);
if (cso->rt[i].blend_enable)
- emit_common_func = TRUE;
+ emit_common_func = true;
}
if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
- emit_common_func = FALSE;
+ emit_common_func = false;
for (i = 0; i < 8; ++i) {
if (!cso->rt[i].blend_enable)
@@ -373,6 +373,16 @@ nv50_zsa_state_create(struct pipe_context *pipe,
SB_DATA (so, 0);
}
+ SB_BEGIN_3D(so, DEPTH_BOUNDS_EN, 1);
+ if (cso->depth.bounds_test) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, DEPTH_BOUNDS(0), 2);
+ SB_DATA (so, fui(cso->depth.bounds_min));
+ SB_DATA (so, fui(cso->depth.bounds_max));
+ } else {
+ SB_DATA (so, 0);
+ }
+
if (cso->stencil[0].enabled) {
SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
SB_DATA (so, 1);
@@ -439,7 +449,7 @@ nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
#define NV50_TSC_WRAP_CASE(n) \
case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
-static INLINE unsigned
+static inline unsigned
nv50_tsc_wrap_mode(unsigned wrap)
{
switch (wrap) {
@@ -572,7 +582,7 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
FREE(hwcso);
}
-static INLINE void
+static inline void
nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
unsigned nr, void **hwcso)
{
@@ -650,7 +660,7 @@ nv50_sampler_view_destroy(struct pipe_context *pipe,
FREE(nv50_tic_entry(view));
}
-static INLINE void
+static inline void
nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
unsigned nr,
struct pipe_sampler_view **views)
@@ -808,7 +818,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res);
- nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+ nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
if (nv50->constbuf[s][i].user) {
nv50->constbuf[s][i].u.data = cb->user_buffer;
nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
@@ -1041,7 +1051,7 @@ nv50_so_target_create(struct pipe_context *pipe,
} else {
targ->pq = NULL;
}
- targ->clean = TRUE;
+ targ->clean = true;
targ->pipe.buffer_size = size;
targ->pipe.buffer_offset = offset;
@@ -1075,32 +1085,32 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
{
struct nv50_context *nv50 = nv50_context(pipe);
unsigned i;
- boolean serialize = TRUE;
- const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
+ bool serialize = true;
+ const bool can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
assert(num_targets <= 4);
for (i = 0; i < num_targets; ++i) {
- const boolean changed = nv50->so_target[i] != targets[i];
- const boolean append = (offsets[i] == (unsigned)-1);
+ const bool changed = nv50->so_target[i] != targets[i];
+ const bool append = (offsets[i] == (unsigned)-1);
if (!changed && append)
continue;
nv50->so_targets_dirty |= 1 << i;
if (can_resume && changed && nv50->so_target[i]) {
nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
- serialize = FALSE;
+ serialize = false;
}
if (targets[i] && !append)
- nv50_so_target(targets[i])->clean = TRUE;
+ nv50_so_target(targets[i])->clean = true;
pipe_so_target_reference(&nv50->so_target[i], targets[i]);
}
for (; i < nv50->num_so_targets; ++i) {
if (can_resume && nv50->so_target[i]) {
nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
- serialize = FALSE;
+ serialize = false;
}
pipe_so_target_reference(&nv50->so_target[i], NULL);
nv50->so_targets_dirty |= 1 << i;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 116bf4bba7c..985603df5fa 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -2,7 +2,7 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_defs.xml.h"
-static INLINE void
+static inline void
nv50_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
{
BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 4);
@@ -82,7 +82,7 @@ nv50_validate_fb(struct nv50_context *nv50)
ms_mode = mt->ms_mode;
if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
- nv50->state.rt_serialize = TRUE;
+ nv50->state.rt_serialize = true;
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
@@ -111,7 +111,7 @@ nv50_validate_fb(struct nv50_context *nv50)
ms_mode = mt->ms_mode;
if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
- nv50->state.rt_serialize = TRUE;
+ nv50->state.rt_serialize = true;
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
@@ -275,7 +275,7 @@ nv50_validate_viewport(struct nv50_context *nv50)
nv50->viewports_dirty = 0;
}
-static INLINE void
+static inline void
nv50_check_program_ucps(struct nv50_context *nv50,
struct nv50_program *vp, uint8_t mask)
{
@@ -296,6 +296,23 @@ nv50_check_program_ucps(struct nv50_context *nv50,
nv50_fp_linkage_validate(nv50);
}
+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nv50_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nv50_validate_derived_2(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ if (nv50->zsa && nv50->zsa->pipe.alpha.enabled &&
+ nv50->framebuffer.nr_cbufs == 0) {
+ nv50_fb_set_null_rt(push, 0);
+ BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | 1);
+ }
+}
+
static void
nv50_validate_clip(struct nv50_context *nv50)
{
@@ -456,6 +473,7 @@ static struct state_validate {
{ nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
{ nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+ { nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
{ nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
{ nv50_constbufs_validate, NV50_NEW_CONSTBUF },
@@ -468,7 +486,7 @@ static struct state_validate {
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
-boolean
+bool
nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words)
{
uint32_t state_mask;
@@ -490,19 +508,19 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words)
nv50->dirty &= ~state_mask;
if (nv50->state.rt_serialize) {
- nv50->state.rt_serialize = FALSE;
+ nv50->state.rt_serialize = false;
BEGIN_NV04(nv50->base.pushbuf, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (nv50->base.pushbuf, 0);
}
- nv50_bufctx_fence(nv50->bufctx_3d, FALSE);
+ nv50_bufctx_fence(nv50->bufctx_3d, false);
}
nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d);
ret = nouveau_pushbuf_validate(nv50->base.pushbuf);
if (unlikely(nv50->state.flushed)) {
- nv50->state.flushed = FALSE;
- nv50_bufctx_fence(nv50->bufctx_3d, TRUE);
+ nv50->state.flushed = false;
+ nv50_bufctx_fence(nv50->bufctx_3d, true);
}
return !ret;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
index eea5327b6cb..cf75d1eb11b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
@@ -31,7 +31,7 @@ struct nv50_rasterizer_stateobj {
struct nv50_zsa_stateobj {
struct pipe_depth_stencil_alpha_state pipe;
int size;
- uint32_t state[29];
+ uint32_t state[34];
};
struct nv50_constbuf {
@@ -41,7 +41,7 @@ struct nv50_constbuf {
} u;
uint32_t size; /* max 65536 */
uint32_t offset;
- boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+ bool user; /* should only be true if u.data is valid and non-NULL */
};
struct nv50_vertex_element {
@@ -56,7 +56,7 @@ struct nv50_vertex_stateobj {
unsigned num_elements;
uint32_t instance_elts;
uint32_t instance_bufs;
- boolean need_conversion;
+ bool need_conversion;
unsigned vertex_size;
unsigned packet_vertex_limit;
struct nv50_vertex_element element[0];
@@ -66,10 +66,10 @@ struct nv50_so_target {
struct pipe_stream_output_target pipe;
struct pipe_query *pq;
unsigned stride;
- boolean clean;
+ bool clean;
};
-static INLINE struct nv50_so_target *
+static inline struct nv50_so_target *
nv50_so_target(struct pipe_stream_output_target *ptarg)
{
return (struct nv50_so_target *)ptarg;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
index 99548cbdb42..e0793bb6ec4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
@@ -9,7 +9,7 @@ struct nv50_tsc_entry {
uint32_t tsc[8];
};
-static INLINE struct nv50_tsc_entry *
+static inline struct nv50_tsc_entry *
nv50_tsc_entry(void *hwcso)
{
return (struct nv50_tsc_entry *)hwcso;
@@ -21,7 +21,7 @@ struct nv50_tic_entry {
uint32_t tic[8];
};
-static INLINE struct nv50_tic_entry *
+static inline struct nv50_tic_entry *
nv50_tic_entry(struct pipe_sampler_view *view)
{
return (struct nv50_tic_entry *)view;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index dc9852d4e47..b1ae01692cb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -49,8 +49,8 @@
#define NOUVEAU_DRIVER 0x50
#include "nv50/nv50_blit.h"
-static INLINE uint8_t
-nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+static inline uint8_t
+nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
{
uint8_t id = nv50_format_table[format].rt;
@@ -76,7 +76,7 @@ nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
static int
nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
struct nv50_miptree *mt, unsigned level, unsigned layer,
- enum pipe_format pformat, boolean dst_src_pformat_equal)
+ enum pipe_format pformat, bool dst_src_pformat_equal)
{
struct nouveau_bo *bo = mt->base.bo;
uint32_t width, height, depth;
@@ -153,7 +153,7 @@ nv50_2d_texture_do_copy(struct nouveau_pushbuf *push,
const enum pipe_format dfmt = dst->base.base.format;
const enum pipe_format sfmt = src->base.base.format;
int ret;
- boolean eqfmt = dfmt == sfmt;
+ bool eqfmt = dfmt == sfmt;
if (!PUSH_SPACE(push, 2 * 16 + 32))
return PIPE_ERROR;
@@ -196,7 +196,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
{
struct nv50_context *nv50 = nv50_context(pipe);
int ret;
- boolean m2mf;
+ bool m2mf;
unsigned dst_layer = dstz, src_layer = src_box->z;
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -658,7 +658,7 @@ nv50_blitter_make_vp(struct nv50_blitter *blit)
};
blit->vp.type = PIPE_SHADER_VERTEX;
- blit->vp.translated = TRUE;
+ blit->vp.translated = true;
blit->vp.code = (uint32_t *)code; /* const_cast */
blit->vp.code_size = sizeof(code);
blit->vp.max_gpr = 4;
@@ -687,24 +687,24 @@ nv50_blitter_make_fp(struct pipe_context *pipe,
const unsigned target = nv50_blit_get_tgsi_texture_target(ptarg);
- boolean tex_rgbaz = FALSE;
- boolean tex_s = FALSE;
- boolean cvt_un8 = FALSE;
+ bool tex_rgbaz = false;
+ bool tex_s = false;
+ bool cvt_un8 = false;
if (mode != NV50_BLIT_MODE_PASS &&
mode != NV50_BLIT_MODE_Z24X8 &&
mode != NV50_BLIT_MODE_X8Z24)
- tex_s = TRUE;
+ tex_s = true;
if (mode != NV50_BLIT_MODE_X24S8 &&
mode != NV50_BLIT_MODE_S8X24 &&
mode != NV50_BLIT_MODE_XS)
- tex_rgbaz = TRUE;
+ tex_rgbaz = true;
if (mode != NV50_BLIT_MODE_PASS &&
mode != NV50_BLIT_MODE_ZS &&
mode != NV50_BLIT_MODE_XS)
- cvt_un8 = TRUE;
+ cvt_un8 = true;
ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
if (!ureg)
@@ -1271,7 +1271,7 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
int i;
uint32_t mode;
uint32_t mask = nv50_blit_eng2d_get_mask(info);
- boolean b;
+ bool b;
mode = nv50_blit_get_filter(info) ?
NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
@@ -1410,7 +1410,7 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
PUSH_DATA (push, srcy >> 32);
}
}
- nv50_bufctx_fence(nv50->bufctx, FALSE);
+ nv50_bufctx_fence(nv50->bufctx, false);
nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
@@ -1432,71 +1432,82 @@ static void
nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
{
struct nv50_context *nv50 = nv50_context(pipe);
- boolean eng3d = FALSE;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ bool eng3d = FALSE;
if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
if (!(info->mask & PIPE_MASK_ZS))
return;
if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
- eng3d = TRUE;
+ eng3d = true;
if (info->filter != PIPE_TEX_FILTER_NEAREST)
- eng3d = TRUE;
+ eng3d = true;
} else {
if (!(info->mask & PIPE_MASK_RGBA))
return;
if (info->mask != PIPE_MASK_RGBA)
- eng3d = TRUE;
+ eng3d = true;
}
if (nv50_miptree(info->src.resource)->layout_3d) {
- eng3d = TRUE;
+ eng3d = true;
} else
if (info->src.box.depth != info->dst.box.depth) {
- eng3d = TRUE;
+ eng3d = true;
debug_printf("blit: cannot filter array or cube textures in z direction");
}
if (!eng3d && info->dst.format != info->src.format) {
if (!nv50_2d_dst_format_faithful(info->dst.format) ||
!nv50_2d_src_format_faithful(info->src.format)) {
- eng3d = TRUE;
+ eng3d = true;
} else
if (!nv50_2d_src_format_faithful(info->src.format)) {
if (!util_format_is_luminance(info->src.format)) {
if (util_format_is_intensity(info->src.format))
- eng3d = TRUE;
+ eng3d = true;
else
if (!nv50_2d_dst_format_ops_supported(info->dst.format))
- eng3d = TRUE;
+ eng3d = true;
else
eng3d = !nv50_2d_format_supported(info->src.format);
}
} else
if (util_format_is_luminance_alpha(info->src.format))
- eng3d = TRUE;
+ eng3d = true;
}
if (info->src.resource->nr_samples == 8 &&
info->dst.resource->nr_samples <= 1)
- eng3d = TRUE;
+ eng3d = true;
/* FIXME: can't make this work with eng2d anymore */
if ((info->src.resource->nr_samples | 1) !=
(info->dst.resource->nr_samples | 1))
- eng3d = TRUE;
+ eng3d = true;
/* FIXME: find correct src coordinate adjustments */
if ((info->src.box.width != info->dst.box.width &&
info->src.box.width != -info->dst.box.width) ||
(info->src.box.height != info->dst.box.height &&
info->src.box.height != -info->dst.box.height))
- eng3d = TRUE;
+ eng3d = true;
+
+ if (nv50->screen->num_occlusion_queries_active) {
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
if (!eng3d)
nv50_blit_eng2d(nv50, info);
else
nv50_blit_3d(nv50, info);
+
+ if (nv50->screen->num_occlusion_queries_active) {
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ }
}
static void
@@ -1505,13 +1516,13 @@ nv50_flush_resource(struct pipe_context *ctx,
{
}
-boolean
+bool
nv50_blitter_create(struct nv50_screen *screen)
{
screen->blitter = CALLOC_STRUCT(nv50_blitter);
if (!screen->blitter) {
NOUVEAU_ERR("failed to allocate blitter struct\n");
- return FALSE;
+ return false;
}
pipe_mutex_init(screen->blitter->mutex);
@@ -1519,7 +1530,7 @@ nv50_blitter_create(struct nv50_screen *screen)
nv50_blitter_make_vp(screen->blitter);
nv50_blitter_make_sampler(screen->blitter);
- return TRUE;
+ return true;
}
void
@@ -1542,20 +1553,20 @@ nv50_blitter_destroy(struct nv50_screen *screen)
FREE(blitter);
}
-boolean
+bool
nv50_blitctx_create(struct nv50_context *nv50)
{
nv50->blit = CALLOC_STRUCT(nv50_blitctx);
if (!nv50->blit) {
NOUVEAU_ERR("failed to allocate blit context\n");
- return FALSE;
+ return false;
}
nv50->blit->nv50 = nv50;
nv50->blit->rast.pipe.half_pixel_center = 1;
- return TRUE;
+ return true;
}
void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
index d69c8d6ff0d..fc6374d1b1b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -31,8 +31,8 @@
(NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
-static INLINE uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+static inline uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
{
switch (swz) {
case PIPE_SWIZZLE_RED:
@@ -71,6 +71,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
uint32_t flags,
enum pipe_texture_target target)
{
+ const uint32_t class_3d = nouveau_context(pipe)->screen->class_3d;
const struct util_format_description *desc;
uint64_t addr;
uint32_t *tic;
@@ -78,7 +79,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
uint32_t depth;
struct nv50_tic_entry *view;
struct nv50_miptree *mt = nv50_miptree(texture);
- boolean tex_int;
+ bool tex_int;
view = MALLOC_STRUCT(nv50_tic_entry);
if (!view)
@@ -192,7 +193,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
break;
default:
NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
- return FALSE;
+ return false;
}
tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
@@ -201,11 +202,17 @@ nv50_create_texture_view(struct pipe_context *pipe,
tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff;
tic[5] |= depth << 16;
- tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+ if (class_3d > NV50_3D_CLASS)
+ tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+ else
+ tic[5] |= view->pipe.u.tex.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */
- tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+ if (class_3d > NV50_3D_CLASS)
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+ else
+ tic[7] = 0;
if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)))
if (mt->base.base.last_level)
@@ -214,13 +221,13 @@ nv50_create_texture_view(struct pipe_context *pipe,
return &view->pipe;
}
-static boolean
+static bool
nv50_validate_tic(struct nv50_context *nv50, int s)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nouveau_bo *txc = nv50->screen->txc;
unsigned i;
- boolean need_flush = FALSE;
+ bool need_flush = false;
assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nv50->num_textures[s]; ++i) {
@@ -263,7 +270,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8);
PUSH_DATAp(push, &tic->tic[0], 8);
- need_flush = TRUE;
+ need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
@@ -309,7 +316,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
void nv50_validate_textures(struct nv50_context *nv50)
{
- boolean need_flush;
+ bool need_flush;
need_flush = nv50_validate_tic(nv50, 0);
need_flush |= nv50_validate_tic(nv50, 1);
@@ -321,12 +328,12 @@ void nv50_validate_textures(struct nv50_context *nv50)
}
}
-static boolean
+static bool
nv50_validate_tsc(struct nv50_context *nv50, int s)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
unsigned i;
- boolean need_flush = FALSE;
+ bool need_flush = false;
assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nv50->num_samplers[s]; ++i) {
@@ -343,7 +350,7 @@ nv50_validate_tsc(struct nv50_context *nv50, int s)
nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
65536 + tsc->id * 32,
NOUVEAU_BO_VRAM, 32, tsc->tsc);
- need_flush = TRUE;
+ need_flush = true;
}
nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
@@ -361,7 +368,7 @@ nv50_validate_tsc(struct nv50_context *nv50, int s)
void nv50_validate_samplers(struct nv50_context *nv50)
{
- boolean need_flush;
+ bool need_flush;
need_flush = nv50_validate_tsc(nv50, 0);
need_flush |= nv50_validate_tsc(nv50, 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 1fd33b8aa59..6324726acec 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -58,7 +58,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
so->num_elements = num_elements;
so->instance_elts = 0;
so->instance_bufs = 0;
- so->need_conversion = FALSE;
+ so->need_conversion = false;
memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
@@ -89,7 +89,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
return NULL;
}
so->element[i].state = nv50_format_table[fmt].vtx;
- so->need_conversion = TRUE;
+ so->need_conversion = true;
}
so->element[i].state |= i;
@@ -188,7 +188,7 @@ nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb,
}
}
-static INLINE void
+static inline void
nv50_user_vbuf_range(struct nv50_context *nv50, unsigned vbi,
uint32_t *base, uint32_t *size)
{
@@ -229,7 +229,7 @@ nv50_upload_user_buffers(struct nv50_context *nv50,
BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART |
NOUVEAU_BO_RD, bo);
}
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
}
static void
@@ -275,10 +275,10 @@ nv50_update_user_vbufs(struct nv50_context *nv50)
PUSH_DATAh(push, address[b] + ve->src_offset);
PUSH_DATA (push, address[b] + ve->src_offset);
}
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
}
-static INLINE void
+static inline void
nv50_release_user_vbufs(struct nv50_context *nv50)
{
if (nv50->vbo_user) {
@@ -316,7 +316,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer);
if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
break;
}
}
@@ -382,6 +382,11 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
if (nv50->vbo_user & (1 << b)) {
address = addrs[b] + ve->pipe.src_offset;
limit = addrs[b] + limits[b];
+ } else
+ if (!vb->buffer) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, 0);
+ continue;
} else {
struct nv04_resource *buf = nv04_resource(vb->buffer);
if (!(refd & (1 << b))) {
@@ -418,7 +423,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
#define NV50_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
-static INLINE unsigned
+static inline unsigned
nv50_prim_gl(unsigned prim)
{
switch (prim) {
@@ -585,7 +590,7 @@ nv50_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
}
static void
-nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
+nv50_draw_elements(struct nv50_context *nv50, bool shorten,
unsigned mode, unsigned start, unsigned count,
unsigned instance_count, int32_t index_bias)
{
@@ -746,9 +751,9 @@ nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)
{
struct nv50_screen *screen = chan->user_priv;
- nouveau_fence_update(&screen->base, TRUE);
+ nouveau_fence_update(&screen->base, true);
- nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, TRUE);
+ nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, true);
}
void
@@ -801,7 +806,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
continue;
if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv50->cb_dirty = TRUE;
+ nv50->cb_dirty = true;
}
}
@@ -809,7 +814,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (nv50->cb_dirty) {
BEGIN_NV04(push, NV50_3D(CODE_CB_FLUSH), 1);
PUSH_DATA (push, 0);
- nv50->cb_dirty = FALSE;
+ nv50->cb_dirty = false;
}
if (nv50->vbo_fifo) {
@@ -830,21 +835,21 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!nv50->vtxbuf[i].buffer)
continue;
if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
}
if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
if (nv50->base.vbo_dirty) {
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
PUSH_DATA (push, 0);
- nv50->base.vbo_dirty = FALSE;
+ nv50->base.vbo_dirty = false;
}
if (info->indexed) {
- boolean shorten = info->max_index <= 65535;
+ bool shorten = info->max_index <= 65535;
if (info->primitive_restart != nv50->state.prim_restart) {
if (info->primitive_restart) {
@@ -853,7 +858,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->restart_index);
if (info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
} else {
BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -865,7 +870,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->restart_index);
if (info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
}
nv50_draw_elements(nv50, shorten,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index e8578c8be6f..76f1b41ea70 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -16,14 +16,14 @@
#endif
-static INLINE void
+static inline void
nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
unsigned flags, struct nouveau_bo *bo)
{
nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
}
-static INLINE void
+static inline void
nv50_add_bufctx_resident(struct nouveau_bufctx *bufctx, int bin,
struct nv04_resource *res, unsigned flags)
{
@@ -39,7 +39,7 @@ nv50_add_bufctx_resident(struct nouveau_bufctx *bufctx, int bin,
#define BCTX_REFN(bctx, bin, res, acc) \
nv50_add_bufctx_resident(bctx, NV50_BIND_##bin, res, NOUVEAU_BO_##acc)
-static INLINE void
+static inline void
PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
{
struct nouveau_pushbuf_refn ref = { bo, flags };
@@ -61,39 +61,39 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
-static INLINE uint32_t
+static inline uint32_t
NV50_FIFO_PKHDR(int subc, int mthd, unsigned size)
{
return 0x00000000 | (size << 18) | (subc << 13) | mthd;
}
-static INLINE uint32_t
+static inline uint32_t
NV50_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
{
return 0x40000000 | (size << 18) | (subc << 13) | mthd;
}
-static INLINE uint32_t
+static inline uint32_t
NV50_FIFO_PKHDR_L(int subc, int mthd)
{
return 0x00030000 | (subc << 13) | mthd;
}
-static INLINE uint32_t
+static inline uint32_t
nouveau_bo_memtype(const struct nouveau_bo *bo)
{
return bo->config.nv50.memtype;
}
-static INLINE void
+static inline void
PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
{
*push->cur++ = (uint32_t)(data >> 32);
}
-static INLINE void
+static inline void
BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
@@ -102,7 +102,7 @@ BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
PUSH_DATA (push, NV50_FIFO_PKHDR(subc, mthd, size));
}
-static INLINE void
+static inline void
BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
@@ -112,7 +112,7 @@ BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
}
/* long, non-incremental, nv50-only */
-static INLINE void
+static inline void
BEGIN_NL50(struct nouveau_pushbuf *push, int subc, int mthd, uint32_t size)
{
#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.h b/src/gallium/drivers/nouveau/nv50/nv84_video.h
index 2edba389dbf..09773c12974 100644
--- a/src/gallium/drivers/nouveau/nv50/nv84_video.h
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video.h
@@ -102,12 +102,12 @@ struct nv84_decoder {
uint8_t mpeg12_non_intra_matrix[64];
};
-static INLINE uint32_t mb(uint32_t coord)
+static inline uint32_t mb(uint32_t coord)
{
return (coord + 0xf)>>4;
}
-static INLINE uint32_t mb_half(uint32_t coord)
+static inline uint32_t mb_half(uint32_t coord)
{
return (coord + 0x1f)>>5;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
index f3480b2e00e..8b121477a37 100644
--- a/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
@@ -221,7 +221,7 @@ nv84_decoder_vp_h264(struct nv84_decoder *dec,
PUSH_KICK (push);
}
-static INLINE int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) {
+static inline int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) {
int16_t ret = val * quant / 16;
if (mpeg1 && ret) {
if (ret > 0)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 56fc83d3679..47bd123621b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -121,51 +121,51 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
return 0;
}
-boolean
+bool
nvc0_compute_validate_program(struct nvc0_context *nvc0)
{
struct nvc0_program *prog = nvc0->compprog;
if (prog->mem)
- return TRUE;
+ return true;
if (!prog->translated) {
prog->translated = nvc0_program_translate(
prog, nvc0->screen->base.device->chipset);
if (!prog->translated)
- return FALSE;
+ return false;
}
if (unlikely(!prog->code_size))
- return FALSE;
+ return false;
if (likely(prog->code_size)) {
if (nvc0_program_upload_code(nvc0, prog)) {
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
- return TRUE;
+ return true;
}
}
- return FALSE;
+ return false;
}
-static boolean
+static bool
nvc0_compute_state_validate(struct nvc0_context *nvc0)
{
if (!nvc0_compute_validate_program(nvc0))
- return FALSE;
+ return false;
/* TODO: textures, samplers, surfaces, global memory buffers */
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
- return FALSE;
+ return false;
if (unlikely(nvc0->state.flushed))
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
- return TRUE;
+ return true;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
index 9a1a71760d7..168a6d1bee2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
@@ -4,7 +4,7 @@
#include "nv50/nv50_defs.xml.h"
#include "nvc0/nvc0_compute.xml.h"
-boolean
+bool
nvc0_compute_validate_program(struct nvc0_context *nvc0);
#endif /* NVC0_COMPUTE_H */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index a35c3f66142..84f8db6a8ac 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -63,12 +63,12 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
if (!nvc0->vtxbuf[i].buffer)
continue;
if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
}
if (nvc0->idxbuf.buffer &&
nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
uint32_t valid = nvc0->constbuf_valid[s];
@@ -86,7 +86,7 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
continue;
if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nvc0->cb_dirty = TRUE;
+ nvc0->cb_dirty = true;
}
}
}
@@ -164,9 +164,9 @@ nvc0_default_kick_notify(struct nouveau_pushbuf *push)
if (screen) {
nouveau_fence_next(&screen->base);
- nouveau_fence_update(&screen->base, TRUE);
+ nouveau_fence_update(&screen->base, true);
if (screen->cur_ctx)
- screen->cur_ctx->state.flushed = TRUE;
+ screen->cur_ctx->state.flushed = true;
NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
}
}
@@ -378,7 +378,7 @@ out_err:
void
nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx,
- boolean on_flush)
+ bool on_flush)
{
struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
struct nouveau_list *it;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index a8d7593b398..f4499423a10 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -54,6 +54,7 @@
#define NVC0_NEW_IDXBUF (1 << 22)
#define NVC0_NEW_SURFACES (1 << 23)
#define NVC0_NEW_MIN_SAMPLES (1 << 24)
+#define NVC0_NEW_TESSFACTOR (1 << 25)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
@@ -93,7 +94,7 @@
struct nvc0_blitctx;
-boolean nvc0_blitctx_create(struct nvc0_context *);
+bool nvc0_blitctx_create(struct nvc0_context *);
void nvc0_blitctx_destroy(struct nvc0_context *);
struct nvc0_context {
@@ -130,7 +131,7 @@ struct nvc0_context {
struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[6];
uint16_t constbuf_valid[6];
- boolean cb_dirty;
+ bool cb_dirty;
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned num_vtxbufs;
@@ -164,14 +165,17 @@ struct nvc0_context {
unsigned sample_mask;
unsigned min_samples;
- boolean vbo_push_hint;
+ float default_tess_outer[4];
+ float default_tess_inner[2];
+
+ bool vbo_push_hint;
uint8_t tfbbuf_dirty;
struct pipe_stream_output_target *tfbbuf[4];
unsigned num_tfbbufs;
struct pipe_query *cond_query;
- boolean cond_cond; /* inverted rendering condition */
+ bool cond_cond; /* inverted rendering condition */
uint cond_mode;
uint32_t cond_condmode; /* the calculated condition */
@@ -184,19 +188,19 @@ struct nvc0_context {
struct util_dynarray global_residents;
};
-static INLINE struct nvc0_context *
+static inline struct nvc0_context *
nvc0_context(struct pipe_context *pipe)
{
return (struct nvc0_context *)pipe;
}
-static INLINE unsigned
+static inline unsigned
nvc0_shader_stage(unsigned pipe)
{
switch (pipe) {
case PIPE_SHADER_VERTEX: return 0;
-/* case PIPE_SHADER_TESSELLATION_CONTROL: return 1; */
-/* case PIPE_SHADER_TESSELLATION_EVALUATION: return 2; */
+ case PIPE_SHADER_TESS_CTRL: return 1;
+ case PIPE_SHADER_TESS_EVAL: return 2;
case PIPE_SHADER_GEOMETRY: return 3;
case PIPE_SHADER_FRAGMENT: return 4;
case PIPE_SHADER_COMPUTE: return 5;
@@ -210,15 +214,15 @@ nvc0_shader_stage(unsigned pipe)
/* nvc0_context.c */
struct pipe_context *nvc0_create(struct pipe_screen *, void *);
void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *,
- boolean on_flush);
+ bool on_flush);
void nvc0_default_kick_notify(struct nouveau_pushbuf *);
/* nvc0_draw.c */
extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
/* nvc0_program.c */
-boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
-boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
+bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
+bool nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_library_upload(struct nvc0_context *);
uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
@@ -231,7 +235,7 @@ void nvc0_query_pushbuf_submit(struct nouveau_pushbuf *,
void nvc0_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
void nvc0_so_target_save_offset(struct pipe_context *,
struct pipe_stream_output_target *, unsigned i,
- boolean *serialize);
+ bool *serialize);
#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
@@ -250,8 +254,8 @@ extern void nvc0_init_state_functions(struct nvc0_context *);
/* nvc0_state_validate.c */
void nvc0_validate_global_residents(struct nvc0_context *,
struct nouveau_bufctx *, int bin);
-extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
- unsigned space_words);
+extern bool nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
+ unsigned space_words);
/* nvc0_surface.c */
extern void nvc0_clear(struct pipe_context *, unsigned buffers,
@@ -260,7 +264,7 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers,
extern void nvc0_init_surface_functions(struct nvc0_context *);
/* nvc0_tex.c */
-boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s);
+bool nve4_validate_tsc(struct nvc0_context *nvc0, int s);
void nvc0_validate_textures(struct nvc0_context *);
void nvc0_validate_samplers(struct nvc0_context *);
void nve4_set_tex_handles(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index 3875bbf4ca4..15991c3d2bd 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -29,13 +29,13 @@
#include "nvc0/nvc0_resource.h"
static uint32_t
-nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d)
+nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
{
return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d);
}
static uint32_t
-nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
{
const unsigned ms = util_logbase2(mt->base.base.nr_samples);
@@ -133,7 +133,7 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
return tile_flags;
}
-static INLINE boolean
+static inline bool
nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
{
switch (mt->base.base.nr_samples) {
@@ -157,9 +157,9 @@ nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
break;
default:
NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
static void
@@ -250,7 +250,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
struct pipe_resource *pt = &mt->base.base;
- boolean compressed = dev->drm_version >= 0x01000101;
+ bool compressed = dev->drm_version >= 0x01000101;
int ret;
union nouveau_bo_config bo_config;
uint32_t bo_flags;
@@ -325,7 +325,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
}
/* Offset of zslice @z from start of level @l. */
-INLINE unsigned
+inline unsigned
nvc0_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
{
const struct pipe_resource *pt = &mt->base.base;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index e1f5a8c4416..507a2507fe3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -31,24 +31,25 @@
* 124 scalar varying values.
*/
static uint32_t
-nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
+nvc0_shader_input_address(unsigned sn, unsigned si)
{
switch (sn) {
- case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4;
+ case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10;
case TGSI_SEMANTIC_PRIMID: return 0x060;
case TGSI_SEMANTIC_LAYER: return 0x064;
case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
case TGSI_SEMANTIC_PSIZE: return 0x06c;
case TGSI_SEMANTIC_POSITION: return 0x070;
- case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10;
case TGSI_SEMANTIC_FOG: return 0x2e8;
case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
- case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
case TGSI_SEMANTIC_PCOORD: return 0x2e0;
- case NV50_SEMANTIC_TESSCOORD: return 0x2f0;
+ case TGSI_SEMANTIC_TESSCOORD: return 0x2f0;
case TGSI_SEMANTIC_INSTANCEID: return 0x2f8;
case TGSI_SEMANTIC_VERTEXID: return 0x2fc;
case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
@@ -60,20 +61,21 @@ nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
}
static uint32_t
-nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
+nvc0_shader_output_address(unsigned sn, unsigned si)
{
switch (sn) {
- case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4;
+ case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10;
case TGSI_SEMANTIC_PRIMID: return 0x060;
case TGSI_SEMANTIC_LAYER: return 0x064;
case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
case TGSI_SEMANTIC_PSIZE: return 0x06c;
case TGSI_SEMANTIC_POSITION: return 0x070;
- case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10;
case TGSI_SEMANTIC_FOG: return 0x2e8;
case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
- case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
@@ -95,7 +97,7 @@ nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info)
case TGSI_SEMANTIC_VERTEXID:
info->in[i].mask = 0x1;
info->in[i].slot[0] =
- nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4;
+ nvc0_shader_input_address(info->in[i].sn, 0) / 4;
continue;
default:
break;
@@ -111,18 +113,11 @@ nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info)
static int
nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
{
- unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
unsigned offset;
unsigned i, c;
for (i = 0; i < info->numInputs; ++i) {
- offset = nvc0_shader_input_address(info->in[i].sn,
- info->in[i].si, ubase);
- if (info->in[i].patch && offset >= 0x20)
- offset = 0x20 + info->in[i].si * 0x10;
-
- if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD)
- info->in[i].mask &= 3;
+ offset = nvc0_shader_input_address(info->in[i].sn, info->in[i].si);
for (c = 0; c < 4; ++c)
info->in[i].slot[c] = (offset + c * 0x4) / 4;
@@ -157,15 +152,11 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
static int
nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info)
{
- unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
unsigned offset;
unsigned i, c;
for (i = 0; i < info->numOutputs; ++i) {
- offset = nvc0_shader_output_address(info->out[i].sn,
- info->out[i].si, ubase);
- if (info->out[i].patch && offset >= 0x20)
- offset = 0x20 + info->out[i].si * 0x10;
+ offset = nvc0_shader_output_address(info->out[i].sn, info->out[i].si);
for (c = 0; c < 4; ++c)
info->out[i].slot[c] = (offset + c * 0x4) / 4;
@@ -193,7 +184,7 @@ nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info)
return ret;
}
-static INLINE void
+static inline void
nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot)
{
uint8_t min = (vp->hdr[4] >> 12) & 0xff;
@@ -216,12 +207,8 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
continue;
for (c = 0; c < 4; ++c) {
a = info->in[i].slot[c];
- if (info->in[i].mask & (1 << c)) {
- if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD)
- vp->hdr[5 + a / 32] |= 1 << (a % 32);
- else
- nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]);
- }
+ if (info->in[i].mask & (1 << c))
+ vp->hdr[5 + a / 32] |= 1 << (a % 32);
}
}
@@ -250,6 +237,14 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
case TGSI_SEMANTIC_VERTEXID:
vp->hdr[10] |= 1 << 31;
break;
+ case TGSI_SEMANTIC_TESSCOORD:
+ /* We don't have the mask, nor the slots populated. While this could
+ * be achieved, the vast majority of the time if either of the coords
+ * are read, then both will be read.
+ */
+ nvc0_vtgp_hdr_update_oread(vp, 0x2f0 / 4);
+ nvc0_vtgp_hdr_update_oread(vp, 0x2f4 / 4);
+ break;
default:
break;
}
@@ -277,7 +272,6 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
return nvc0_vtgp_gen_header(vp, info);
}
-#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN)
static void
nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
{
@@ -305,14 +299,13 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
switch (info->prop.tp.partitioning) {
- case PIPE_TESS_PART_INTEGER:
- case PIPE_TESS_PART_POW2:
+ case PIPE_TESS_SPACING_EQUAL:
tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL;
break;
- case PIPE_TESS_PART_FRACT_ODD:
+ case PIPE_TESS_SPACING_FRACTIONAL_ODD:
tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD;
break;
- case PIPE_TESS_PART_FRACT_EVEN:
+ case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN;
break;
default:
@@ -320,9 +313,7 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
break;
}
}
-#endif
-#ifdef PIPE_SHADER_HULL
static int
nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
{
@@ -346,9 +337,7 @@ nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
return 0;
}
-#endif
-#ifdef PIPE_SHADER_DOMAIN
static int
nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
{
@@ -365,7 +354,6 @@ nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
return 0;
}
-#endif
static int
nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
@@ -523,7 +511,7 @@ nvc0_program_dump(struct nvc0_program *prog)
}
#endif
-boolean
+bool
nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
{
struct nv50_ir_prog_info *info;
@@ -531,7 +519,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
- return FALSE;
+ return false;
info->type = prog->type;
info->target = chipset;
@@ -598,16 +586,12 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
case PIPE_SHADER_VERTEX:
ret = nvc0_vp_gen_header(prog, info);
break;
-#ifdef PIPE_SHADER_HULL
- case PIPE_SHADER_HULL:
+ case PIPE_SHADER_TESS_CTRL:
ret = nvc0_tcp_gen_header(prog, info);
break;
-#endif
-#ifdef PIPE_SHADER_DOMAIN
- case PIPE_SHADER_DOMAIN:
+ case PIPE_SHADER_TESS_EVAL:
ret = nvc0_tep_gen_header(prog, info);
break;
-#endif
case PIPE_SHADER_GEOMETRY:
ret = nvc0_gp_gen_header(prog, info);
break;
@@ -630,7 +614,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
assert(info->bin.tlsSpace < (1 << 24));
prog->hdr[0] |= 1 << 26;
prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */
- prog->need_tls = TRUE;
+ prog->need_tls = true;
}
/* TODO: factor 2 only needed where joinat/precont is used,
* and we only have to count non-uniform branches
@@ -638,7 +622,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
/*
if ((info->maxCFDepth * 2) > 16) {
prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
- prog->need_tls = TRUE;
+ prog->need_tls = true;
}
*/
if (info->io.globalAccess)
@@ -655,11 +639,11 @@ out:
return !ret;
}
-boolean
+bool
nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
struct nvc0_screen *screen = nvc0->screen;
- const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE;
+ const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
int ret;
uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
uint32_t lib_pos = screen->lib_code->start;
@@ -694,7 +678,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
if (ret) {
NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
- return FALSE;
+ return false;
}
IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0);
}
@@ -729,7 +713,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
#ifdef DEBUG
- if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE))
+ if (debug_get_bool_option("NV50_PROG_DEBUG", false))
nvc0_program_dump(prog);
#endif
@@ -746,7 +730,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
PUSH_DATA (nvc0->base.pushbuf, 0x1011);
- return TRUE;
+ return true;
}
/* Upload code for builtin functions like integer division emulation. */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 3fd9d21b4c4..390e0c7a4f0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -21,8 +21,8 @@ struct nvc0_program {
struct pipe_shader_state pipe;
ubyte type;
- boolean translated;
- boolean need_tls;
+ bool translated;
+ bool need_tls;
uint8_t num_gprs;
uint32_t *code;
@@ -41,7 +41,7 @@ struct nvc0_program {
uint8_t clip_enable; /* mask of defined clip planes */
uint8_t num_ucps; /* also set to max if ClipDistance is used */
uint8_t edgeflag; /* attribute index of edgeflag input */
- boolean need_vertex_id;
+ bool need_vertex_id;
} vp;
struct {
uint8_t early_z;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index aea6cbda02d..f7b85a8e931 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -44,7 +44,7 @@ struct nvc0_query {
uint32_t base;
uint32_t offset; /* base + i * rotate */
uint8_t state;
- boolean is64bit;
+ bool is64bit;
uint8_t rotate;
int nesting; /* only used for occlusion queries */
union {
@@ -62,13 +62,13 @@ static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
struct nvc0_query *, void *, boolean);
-static INLINE struct nvc0_query *
+static inline struct nvc0_query *
nvc0_query(struct pipe_query *pipe)
{
return (struct nvc0_query *)pipe;
}
-static boolean
+static bool
nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
{
struct nvc0_screen *screen = nvc0->screen;
@@ -87,17 +87,17 @@ nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
if (size) {
q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
if (!q->bo)
- return FALSE;
+ return false;
q->offset = q->base;
ret = nouveau_bo_map(q->bo, 0, screen->base.client);
if (ret) {
nvc0_query_allocate(nvc0, q, 0);
- return FALSE;
+ return false;
}
q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
}
- return TRUE;
+ return true;
}
static void
@@ -126,17 +126,17 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
space = NVC0_QUERY_ALLOC_SPACE;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
- q->is64bit = TRUE;
+ q->is64bit = true;
space = 512;
break;
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- q->is64bit = TRUE;
+ q->is64bit = true;
space = 64;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
- q->is64bit = TRUE;
+ q->is64bit = true;
q->index = index;
space = 32;
break;
@@ -257,11 +257,11 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_query *q = nvc0_query(pq);
- boolean ret = true;
+ bool ret = true;
/* For occlusion queries we have to change the storage, because a previous
- * query might set the initial render conition to FALSE even *after* we re-
- * initialized it to TRUE.
+ * query might set the initial render conition to false even *after* we re-
+ * initialized it to true.
*/
if (q->rotate) {
nvc0_query_rotate(nvc0, q);
@@ -270,7 +270,7 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
* query ?
*/
q->data[0] = q->sequence; /* initialize sequence */
- q->data[1] = 1; /* initial render condition = TRUE */
+ q->data[1] = 1; /* initial render condition = true */
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
q->data[5] = 0;
}
@@ -401,7 +401,7 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* This query is not issued on GPU because disjoint is forced to FALSE */
+ /* This query is not issued on GPU because disjoint is forced to false */
q->state = NVC0_QUERY_STATE_READY;
break;
default:
@@ -422,7 +422,7 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
}
-static INLINE void
+static inline void
nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
{
if (q->is64bit) {
@@ -442,7 +442,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
struct nvc0_query *q = nvc0_query(pq);
uint64_t *res64 = (uint64_t*)result;
uint32_t *res32 = (uint32_t*)result;
- boolean *res8 = (boolean*)result;
+ uint8_t *res8 = (uint8_t*)result;
uint64_t *data64 = (uint64_t *)q->data;
unsigned i;
@@ -450,7 +450,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
q->type <= NVC0_QUERY_DRV_STAT_LAST) {
res64[0] = q->u.value;
- return TRUE;
+ return true;
} else
#endif
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
@@ -468,17 +468,17 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
/* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
PUSH_KICK(nvc0->base.pushbuf);
}
- return FALSE;
+ return false;
}
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
- return FALSE;
+ return false;
NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
}
q->state = NVC0_QUERY_STATE_READY;
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
- res8[0] = TRUE;
+ res8[0] = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
res64[0] = q->data[1] - q->data[5];
@@ -502,7 +502,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
res64[0] = 1000000000;
- res8[8] = FALSE;
+ res8[8] = false;
break;
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
@@ -516,10 +516,10 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
break;
default:
assert(0); /* can't happen, we don't create queries with invalid type */
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
void
@@ -549,7 +549,7 @@ nvc0_render_condition(struct pipe_context *pipe,
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_query *q;
uint32_t cond;
- boolean wait =
+ bool wait =
mode != PIPE_RENDER_COND_NO_WAIT &&
mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
@@ -563,7 +563,7 @@ nvc0_render_condition(struct pipe_context *pipe,
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
cond = condition ? NVC0_3D_COND_MODE_EQUAL :
NVC0_3D_COND_MODE_NOT_EQUAL;
- wait = TRUE;
+ wait = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -626,12 +626,12 @@ nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
void
nvc0_so_target_save_offset(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg,
- unsigned index, boolean *serialize)
+ unsigned index, bool *serialize)
{
struct nvc0_so_target *targ = nvc0_so_target(ptarg);
if (*serialize) {
- *serialize = FALSE;
+ *serialize = false;
PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
@@ -1080,7 +1080,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
+ const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
const struct nvc0_mp_pm_query_cfg *cfg;
unsigned i, c;
unsigned num_ab[2] = { 0, 0 };
@@ -1101,7 +1101,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
if (!screen->pm.mp_counters_enabled) {
- screen->pm.mp_counters_enabled = TRUE;
+ screen->pm.mp_counters_enabled = true;
BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
PUSH_DATA (push, 0x1fcb);
}
@@ -1168,7 +1168,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
struct nvc0_screen *screen = nvc0->screen;
struct pipe_context *pipe = &nvc0->base.pipe;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
+ const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
uint32_t mask;
uint32_t input[3];
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
@@ -1181,7 +1181,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
if (unlikely(!screen->pm.prog)) {
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
prog->type = PIPE_SHADER_COMPUTE;
- prog->translated = TRUE;
+ prog->translated = true;
prog->num_gprs = 14;
prog->parm_size = 12;
if (is_nve4) {
@@ -1249,9 +1249,9 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
}
}
-static INLINE boolean
+static inline bool
nvc0_mp_pm_query_read_data(uint32_t count[32][4],
- struct nvc0_context *nvc0, boolean wait,
+ struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
const struct nvc0_mp_pm_query_cfg *cfg,
unsigned mp_count)
@@ -1264,19 +1264,19 @@ nvc0_mp_pm_query_read_data(uint32_t count[32][4],
for (c = 0; c < cfg->num_counters; ++c) {
if (q->data[b + 8] != q->sequence) {
if (!wait)
- return FALSE;
+ return false;
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
- return FALSE;
+ return false;
}
count[p][c] = q->data[b + q->ctr[c]];
}
}
- return TRUE;
+ return true;
}
-static INLINE boolean
+static inline bool
nve4_mp_pm_query_read_data(uint32_t count[32][4],
- struct nvc0_context *nvc0, boolean wait,
+ struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
const struct nvc0_mp_pm_query_cfg *cfg,
unsigned mp_count)
@@ -1291,9 +1291,9 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4],
for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
if (q->data[b + 20 + d] != q->sequence) {
if (!wait)
- return FALSE;
+ return false;
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
- return FALSE;
+ return false;
}
if (q->ctr[c] & ~0x3)
count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
@@ -1302,7 +1302,7 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4],
}
}
}
- return TRUE;
+ return true;
}
/* Metric calculations:
@@ -1325,7 +1325,7 @@ nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
unsigned p, c;
const struct nvc0_mp_pm_query_cfg *cfg;
- boolean ret;
+ bool ret;
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
@@ -1334,7 +1334,7 @@ nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
else
ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
if (!ret)
- return FALSE;
+ return false;
if (cfg->op == NVC0_COUNTER_OPn_SUM) {
for (c = 0; c < cfg->num_counters; ++c)
@@ -1394,7 +1394,7 @@ nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
}
*(uint64_t *)result = value;
- return TRUE;
+ return true;
}
int
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 56c230e42fc..ab19b26f156 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -44,16 +44,16 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
unsigned bindings)
{
if (sample_count > 8)
- return FALSE;
+ return false;
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
- return FALSE;
+ return false;
if (!util_format_is_supported(format, bindings))
- return FALSE;
+ return false;
if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER))
if (util_format_get_blocksizebits(format) == 3 * 32)
- return FALSE;
+ return false;
/* transfers & shared are always supported */
bindings &= ~(PIPE_BIND_TRANSFER_READ |
@@ -120,6 +120,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
case PIPE_CAP_ENDIANNESS:
return PIPE_ENDIAN_LITTLE;
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ return 30;
/* supported caps */
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
@@ -163,7 +165,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
@@ -174,11 +175,16 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_COMPUTE:
return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
/* unsupported caps */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -226,13 +232,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
switch (shader) {
case PIPE_SHADER_VERTEX:
- /*
- case PIPE_SHADER_TESSELLATION_CONTROL:
- case PIPE_SHADER_TESSELLATION_EVALUATION:
- */
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
break;
+ case PIPE_SHADER_TESS_CTRL:
+ case PIPE_SHADER_TESS_EVAL:
+ if (class_3d >= GM107_3D_CLASS)
+ return 0;
+ break;
case PIPE_SHADER_COMPUTE:
if (class_3d != NVE4_3D_CLASS)
return 0;
@@ -341,6 +348,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_compute_cap param, void *data)
{
uint64_t *data64 = (uint64_t *)data;
+ uint32_t *data32 = (uint32_t *)data;
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
switch (param) {
@@ -372,6 +380,9 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
data64[0] = 4096;
return 8;
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ data32[0] = 32;
+ return 4;
default:
return 0;
}
@@ -550,7 +561,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
/* Using COMPUTE has weird effects on 3D state, we need to
* investigate this further before enabling it by default.
*/
- if (debug_get_bool_option("NVC0_COMPUTE", FALSE))
+ if (debug_get_bool_option("NVC0_COMPUTE", false))
return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
return 0;
case 0xe0:
@@ -564,7 +575,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
}
}
-boolean
+bool
nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
uint32_t lpos, uint32_t lneg, uint32_t cstack)
{
@@ -574,7 +585,7 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
if (size >= (1 << 20)) {
NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
- return FALSE;
+ return false;
}
size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
@@ -587,11 +598,11 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
NULL, &bo);
if (ret) {
NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
- return FALSE;
+ return false;
}
nouveau_bo_ref(NULL, &screen->tls);
screen->tls = bo;
- return TRUE;
+ return true;
}
#define FAIL_SCREEN_INIT(str, err) \
@@ -610,6 +621,7 @@ nvc0_screen_create(struct nouveau_device *dev)
struct nouveau_pushbuf *push;
uint64_t value;
uint32_t obj_class;
+ uint32_t flags;
int ret;
unsigned i;
@@ -665,8 +677,11 @@ nvc0_screen_create(struct nouveau_device *dev)
screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
- ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL,
- &screen->fence.bo);
+ flags = NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
+ if (dev->drm_version >= 0x01000202)
+ flags |= NOUVEAU_BO_COHERENT;
+
+ ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, &screen->fence.bo);
if (ret)
goto fail;
nouveau_bo_map(screen->fence.bo, 0, NULL);
@@ -781,7 +796,7 @@ nvc0_screen_create(struct nouveau_device *dev)
BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1);
PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS);
- if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+ if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
/* kill shaders after about 1 second (at 100 MHz) */
BEGIN_NVC0(push, NVC0_3D(WATCHDOG_TIMER), 1);
PUSH_DATA (push, 0x17);
@@ -1012,6 +1027,7 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 0x20);
BEGIN_NVC0(push, NVC0_3D(SP_SELECT(0)), 1);
PUSH_DATA (push, 0x00);
+ screen->save_state.patch_vertices = 3;
BEGIN_NVC0(push, NVC0_3D(POINT_COORD_REPLACE), 1);
PUSH_DATA (push, 0);
@@ -1031,7 +1047,7 @@ nvc0_screen_create(struct nouveau_device *dev)
if (!nvc0_blitter_create(screen))
goto fail;
- nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index ef2bd43f006..d8826ae0c0d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -28,16 +28,17 @@ struct nvc0_context;
struct nvc0_blitter;
struct nvc0_graph_state {
- boolean flushed;
- boolean rasterizer_discard;
- boolean early_z_forced;
- boolean prim_restart;
+ bool flushed;
+ bool rasterizer_discard;
+ bool early_z_forced;
+ bool prim_restart;
uint32_t instance_elts; /* bitmask of per-instance elements */
uint32_t instance_base;
uint32_t constant_vbos;
uint32_t constant_elts;
int32_t index_bias;
uint16_t scissor;
+ uint8_t patch_vertices;
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
@@ -95,7 +96,7 @@ struct nvc0_screen {
struct nvc0_program *prog; /* compute state object to read MP counters */
struct pipe_query *mp_counter[8]; /* counter to query allocation */
uint8_t num_mp_pm_active[2];
- boolean mp_counters_enabled;
+ bool mp_counters_enabled;
} pm;
struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
@@ -105,7 +106,7 @@ struct nvc0_screen {
struct nouveau_object *nvsw;
};
-static INLINE struct nvc0_screen *
+static inline struct nvc0_screen *
nvc0_screen(struct pipe_screen *screen)
{
return (struct nvc0_screen *)screen;
@@ -276,7 +277,7 @@ int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
int nvc0_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
struct pipe_driver_query_group_info *);
-boolean nvc0_blitter_create(struct nvc0_screen *);
+bool nvc0_blitter_create(struct nvc0_screen *);
void nvc0_blitter_destroy(struct nvc0_screen *);
void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
@@ -287,10 +288,10 @@ int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
-boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
- uint32_t lneg, uint32_t cstack);
+bool nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
+ uint32_t lneg, uint32_t cstack);
-static INLINE void
+static inline void
nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
{
struct nvc0_screen *screen = nvc0_screen(res->base.screen);
@@ -302,7 +303,7 @@ nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
}
}
-static INLINE void
+static inline void
nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
{
if (likely(res->bo)) {
@@ -325,21 +326,21 @@ struct nvc0_format {
extern const struct nvc0_format nvc0_format_table[];
-static INLINE void
+static inline void
nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
{
if (tic->id >= 0)
screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
}
-static INLINE void
+static inline void
nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
{
if (tsc->id >= 0)
screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
}
-static INLINE void
+static inline void
nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
{
if (tic->id >= 0) {
@@ -348,7 +349,7 @@ nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
}
}
-static INLINE void
+static inline void
nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
{
if (tsc->id >= 0) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index e0842784a88..8aa127adc0a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -27,7 +27,7 @@
#include "nvc0/nvc0_context.h"
-static INLINE void
+static inline void
nvc0_program_update_context_state(struct nvc0_context *nvc0,
struct nvc0_program *prog, int stage)
{
@@ -63,22 +63,22 @@ nvc0_program_update_context_state(struct nvc0_context *nvc0,
}
}
-static INLINE boolean
+static inline bool
nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
if (prog->mem)
- return TRUE;
+ return true;
if (!prog->translated) {
prog->translated = nvc0_program_translate(
prog, nvc0->screen->base.device->chipset);
if (!prog->translated)
- return FALSE;
+ return false;
}
if (likely(prog->code_size))
return nvc0_program_upload_code(nvc0, prog);
- return TRUE; /* stream output info only */
+ return true; /* stream output info only */
}
void
@@ -147,9 +147,6 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, tp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
PUSH_DATA (push, tp->num_gprs);
-
- if (tp->tp.input_patch_size <= 32)
- IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
} else {
BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
PUSH_DATA (push, 0x20);
@@ -192,7 +189,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
/* we allow GPs with no code for specifying stream output state only */
if (gp && gp->code_size) {
- const boolean gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
+ const bool gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
PUSH_DATA (push, 0x41);
@@ -280,7 +277,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
nvc0_query_pushbuf_submit(push, targ->pq, 0x4);
} else {
PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
- targ->clean = FALSE;
+ targ->clean = false;
}
}
for (; b < 4; ++b)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 6b7a211e71b..2a33857d9df 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -35,7 +35,7 @@
#include "nouveau_gldefs.h"
-static INLINE uint32_t
+static inline uint32_t
nvc0_colormask(unsigned mask)
{
uint32_t ret = 0;
@@ -55,7 +55,7 @@ nvc0_colormask(unsigned mask)
#define NVC0_BLEND_FACTOR_CASE(a, b) \
case PIPE_BLENDFACTOR_##a: return NV50_BLEND_FACTOR_##b
-static INLINE uint32_t
+static inline uint32_t
nvc0_blend_fac(unsigned factor)
{
switch (factor) {
@@ -92,8 +92,8 @@ nvc0_blend_state_create(struct pipe_context *pipe,
int r; /* reference */
uint32_t ms;
uint8_t blend_en = 0;
- boolean indep_masks = FALSE;
- boolean indep_funcs = FALSE;
+ bool indep_masks = false;
+ bool indep_funcs = false;
so->pipe = *cso;
@@ -111,7 +111,7 @@ nvc0_blend_state_create(struct pipe_context *pipe,
cso->rt[i].alpha_func != cso->rt[r].alpha_func ||
cso->rt[i].alpha_src_factor != cso->rt[r].alpha_src_factor ||
cso->rt[i].alpha_dst_factor != cso->rt[r].alpha_dst_factor) {
- indep_funcs = TRUE;
+ indep_funcs = true;
break;
}
}
@@ -120,7 +120,7 @@ nvc0_blend_state_create(struct pipe_context *pipe,
for (i = 1; i < 8; ++i) {
if (cso->rt[i].colormask != cso->rt[0].colormask) {
- indep_masks = TRUE;
+ indep_masks = true;
break;
}
}
@@ -351,6 +351,13 @@ nvc0_zsa_state_create(struct pipe_context *pipe,
SB_DATA (so, nvgl_comparison_op(cso->depth.func));
}
+ SB_IMMED_3D(so, DEPTH_BOUNDS_EN, cso->depth.bounds_test);
+ if (cso->depth.bounds_test) {
+ SB_BEGIN_3D(so, DEPTH_BOUNDS(0), 2);
+ SB_DATA (so, fui(cso->depth.bounds_min));
+ SB_DATA (so, fui(cso->depth.bounds_max));
+ }
+
if (cso->stencil[0].enabled) {
SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
SB_DATA (so, 1);
@@ -428,7 +435,7 @@ nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
FREE(hwcso);
}
-static INLINE void
+static inline void
nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
unsigned nr, void **hwcso)
{
@@ -508,6 +515,14 @@ nvc0_bind_sampler_states(struct pipe_context *pipe, unsigned shader,
assert(start == 0);
nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
break;
+ case PIPE_SHADER_TESS_CTRL:
+ assert(start == 0);
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 1, nr, s);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ assert(start == 0);
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 2, nr, s);
+ break;
case PIPE_SHADER_GEOMETRY:
assert(start == 0);
nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
@@ -537,7 +552,7 @@ nvc0_sampler_view_destroy(struct pipe_context *pipe,
FREE(nv50_tic_entry(view));
}
-static INLINE void
+static inline void
nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
unsigned nr,
struct pipe_sampler_view **views)
@@ -633,6 +648,12 @@ nvc0_set_sampler_views(struct pipe_context *pipe, unsigned shader,
case PIPE_SHADER_VERTEX:
nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
break;
+ case PIPE_SHADER_TESS_CTRL:
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 1, nr, views);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 2, nr, views);
+ break;
case PIPE_SHADER_GEOMETRY:
nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
break;
@@ -734,6 +755,38 @@ nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
}
static void *
+nvc0_tcp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_TESS_CTRL);
+}
+
+static void
+nvc0_tcp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->tctlprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_TCTLPROG;
+}
+
+static void *
+nvc0_tep_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_TESS_EVAL);
+}
+
+static void
+nvc0_tep_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->tevlprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_TEVLPROG;
+}
+
+static void *
nvc0_cp_state_create(struct pipe_context *pipe,
const struct pipe_compute_state *cso)
{
@@ -790,7 +843,7 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
- nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+ nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
if (nvc0->constbuf[s][i].user) {
nvc0->constbuf[s][i].u.data = cb->user_buffer;
nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
@@ -934,6 +987,18 @@ nvc0_set_viewport_states(struct pipe_context *pipe,
}
static void
+nvc0_set_tess_state(struct pipe_context *pipe,
+ const float default_tess_outer[4],
+ const float default_tess_inner[2])
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ memcpy(nvc0->default_tess_outer, default_tess_outer, 4 * sizeof(float));
+ memcpy(nvc0->default_tess_inner, default_tess_inner, 2 * sizeof(float));
+ nvc0->dirty |= NVC0_NEW_TESSFACTOR;
+}
+
+static void
nvc0_set_vertex_buffers(struct pipe_context *pipe,
unsigned start_slot, unsigned count,
const struct pipe_vertex_buffer *vb)
@@ -1018,7 +1083,7 @@ nvc0_so_target_create(struct pipe_context *pipe,
FREE(targ);
return NULL;
}
- targ->clean = TRUE;
+ targ->clean = true;
targ->pipe.buffer_size = size;
targ->pipe.buffer_offset = offset;
@@ -1051,13 +1116,13 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
unsigned i;
- boolean serialize = TRUE;
+ bool serialize = true;
assert(num_targets <= 4);
for (i = 0; i < num_targets; ++i) {
- const boolean changed = nvc0->tfbbuf[i] != targets[i];
- const boolean append = (offsets[i] == ((unsigned)-1));
+ const bool changed = nvc0->tfbbuf[i] != targets[i];
+ const bool append = (offsets[i] == ((unsigned)-1));
if (!changed && append)
continue;
nvc0->tfbbuf_dirty |= 1 << i;
@@ -1066,7 +1131,7 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
if (targets[i] && !append)
- nvc0_so_target(targets[i])->clean = TRUE;
+ nvc0_so_target(targets[i])->clean = true;
pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]);
}
@@ -1125,16 +1190,18 @@ nvc0_set_compute_resources(struct pipe_context *pipe,
}
static void
-nvc0_set_shader_resources(struct pipe_context *pipe,
- unsigned start, unsigned nr,
- struct pipe_surface **resources)
+nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
+ unsigned start_slot, unsigned count,
+ struct pipe_image_view **views)
{
- nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources);
+#if 0
+ nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
+#endif
}
-static INLINE void
+static inline void
nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
{
struct nv04_resource *buf = nv04_resource(res);
@@ -1218,12 +1285,18 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->create_vs_state = nvc0_vp_state_create;
pipe->create_fs_state = nvc0_fp_state_create;
pipe->create_gs_state = nvc0_gp_state_create;
+ pipe->create_tcs_state = nvc0_tcp_state_create;
+ pipe->create_tes_state = nvc0_tep_state_create;
pipe->bind_vs_state = nvc0_vp_state_bind;
pipe->bind_fs_state = nvc0_fp_state_bind;
pipe->bind_gs_state = nvc0_gp_state_bind;
+ pipe->bind_tcs_state = nvc0_tcp_state_bind;
+ pipe->bind_tes_state = nvc0_tep_state_bind;
pipe->delete_vs_state = nvc0_sp_state_delete;
pipe->delete_fs_state = nvc0_sp_state_delete;
pipe->delete_gs_state = nvc0_sp_state_delete;
+ pipe->delete_tcs_state = nvc0_sp_state_delete;
+ pipe->delete_tes_state = nvc0_sp_state_delete;
pipe->create_compute_state = nvc0_cp_state_create;
pipe->bind_compute_state = nvc0_cp_state_bind;
@@ -1239,6 +1312,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
pipe->set_scissor_states = nvc0_set_scissor_states;
pipe->set_viewport_states = nvc0_set_viewport_states;
+ pipe->set_tess_state = nvc0_set_tess_state;
pipe->create_vertex_elements_state = nvc0_vertex_state_create;
pipe->delete_vertex_elements_state = nvc0_vertex_state_delete;
@@ -1253,8 +1327,14 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_global_binding = nvc0_set_global_bindings;
pipe->set_compute_resources = nvc0_set_compute_resources;
- pipe->set_shader_resources = nvc0_set_shader_resources;
+ pipe->set_shader_images = nvc0_set_shader_images;
nvc0->sample_mask = ~0;
nvc0->min_samples = 1;
+ nvc0->default_tess_outer[0] =
+ nvc0->default_tess_outer[1] =
+ nvc0->default_tess_outer[2] =
+ nvc0->default_tess_outer[3] = 1.0;
+ nvc0->default_tess_inner[0] =
+ nvc0->default_tess_inner[1] = 1.0;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index c52399ab312..ce1119c284d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -55,7 +55,7 @@ nvc0_validate_zcull(struct nvc0_context *nvc0)
}
#endif
-static INLINE void
+static inline void
nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
{
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 6);
@@ -74,7 +74,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
unsigned i, ms;
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
- boolean serialize = FALSE;
+ bool serialize = false;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
@@ -136,7 +136,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
}
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
- serialize = TRUE;
+ serialize = true;
res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
@@ -168,7 +168,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
ms_mode = mt->ms_mode;
if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
- serialize = TRUE;
+ serialize = true;
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
@@ -309,7 +309,7 @@ nvc0_validate_viewport(struct nvc0_context *nvc0)
nvc0->viewports_dirty = 0;
}
-static INLINE void
+static inline void
nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -324,7 +324,7 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
}
-static INLINE void
+static inline void
nvc0_check_program_ucps(struct nvc0_context *nvc0,
struct nvc0_program *vp, uint8_t mask)
{
@@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0,
nvc0_vertprog_validate(nvc0);
else
if (likely(vp == nvc0->gmtyprog))
- nvc0_vertprog_validate(nvc0);
+ nvc0_gmtyprog_validate(nvc0);
else
nvc0_tevlprog_validate(nvc0);
}
@@ -455,6 +455,8 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, (i << 4) | 1);
BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
+
+ nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
} else {
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
PUSH_DATA (push, (i << 4) | 0);
@@ -518,12 +520,12 @@ static void
nvc0_validate_derived_1(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- boolean rasterizer_discard;
+ bool rasterizer_discard;
if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
- rasterizer_discard = TRUE;
+ rasterizer_discard = true;
} else {
- boolean zs = nvc0->zsa &&
+ bool zs = nvc0->zsa &&
(nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled);
rasterizer_discard = !zs &&
(!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
@@ -535,6 +537,33 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0)
}
}
+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nvc0_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nvc0_validate_derived_2(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled &&
+ nvc0->framebuffer.nr_cbufs == 0) {
+ nvc0_fb_set_null_rt(push, 0);
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | 1);
+ }
+}
+
+static void
+nvc0_validate_tess_state(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);
+ PUSH_DATAp(push, nvc0->default_tess_outer, 4);
+ PUSH_DATAp(push, nvc0->default_tess_inner, 2);
+}
+
static void
nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
{
@@ -593,10 +622,12 @@ static struct state_validate {
{ nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
{ nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
+ { nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
+ { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
{ nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
NVC0_NEW_VERTPROG |
NVC0_NEW_TEVLPROG |
@@ -613,7 +644,7 @@ static struct state_validate {
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
-boolean
+bool
nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, unsigned words)
{
uint32_t state_mask;
@@ -634,15 +665,15 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, unsigned words)
}
nvc0->dirty &= ~state_mask;
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, FALSE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, false);
}
nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_3d);
ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);
if (unlikely(nvc0->state.flushed)) {
- nvc0->state.flushed = FALSE;
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, TRUE);
+ nvc0->state.flushed = false;
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true);
}
return !ret;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
index 1d70b7c7b23..18fcc12dea3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
@@ -29,7 +29,7 @@ struct nvc0_rasterizer_stateobj {
struct nvc0_zsa_stateobj {
struct pipe_depth_stencil_alpha_state pipe;
int size;
- uint32_t state[26];
+ uint32_t state[30];
};
struct nvc0_constbuf {
@@ -39,7 +39,7 @@ struct nvc0_constbuf {
} u;
uint32_t size;
uint32_t offset;
- boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+ bool user; /* should only be true if u.data is valid and non-NULL */
};
struct nvc0_vertex_element {
@@ -55,8 +55,8 @@ struct nvc0_vertex_stateobj {
unsigned num_elements;
uint32_t instance_elts;
uint32_t instance_bufs;
- boolean shared_slots;
- boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
+ bool shared_slots;
+ bool need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
unsigned size; /* size of vertex in bytes (when packed) */
struct nvc0_vertex_element element[0];
};
@@ -65,10 +65,10 @@ struct nvc0_so_target {
struct pipe_stream_output_target pipe;
struct pipe_query *pq;
unsigned stride;
- boolean clean;
+ bool clean;
};
-static INLINE struct nvc0_so_target *
+static inline struct nvc0_so_target *
nvc0_so_target(struct pipe_stream_output_target *ptarg)
{
return (struct nvc0_so_target *)ptarg;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index a820de7259a..51a6f93f891 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -47,8 +47,8 @@
#define NOUVEAU_DRIVER 0xc0
#include "nv50/nv50_blit.h"
-static INLINE uint8_t
-nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+static inline uint8_t
+nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
{
uint8_t id = nvc0_format_table[format].rt;
@@ -81,9 +81,9 @@ nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
}
static int
-nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst,
+nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst,
struct nv50_miptree *mt, unsigned level, unsigned layer,
- enum pipe_format pformat, boolean dst_src_pformat_equal)
+ enum pipe_format pformat, bool dst_src_pformat_equal)
{
struct nouveau_bo *bo = mt->base.bo;
uint32_t width, height, depth;
@@ -161,16 +161,16 @@ nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
const enum pipe_format dfmt = dst->base.base.format;
const enum pipe_format sfmt = src->base.base.format;
int ret;
- boolean eqfmt = dfmt == sfmt;
+ bool eqfmt = dfmt == sfmt;
if (!PUSH_SPACE(push, 2 * 16 + 32))
return PIPE_ERROR;
- ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt);
+ ret = nvc0_2d_texture_set(push, true, dst, dst_level, dz, dfmt, eqfmt);
if (ret)
return ret;
- ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt);
+ ret = nvc0_2d_texture_set(push, false, src, src_level, sz, sfmt, eqfmt);
if (ret)
return ret;
@@ -189,7 +189,7 @@ nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
PUSH_DATA (push, 0);
PUSH_DATA (push, sx << src->ms_x);
PUSH_DATA (push, 0);
- PUSH_DATA (push, sy << src->ms_x);
+ PUSH_DATA (push, sy << src->ms_y);
return 0;
}
@@ -203,7 +203,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
int ret;
- boolean m2mf;
+ bool m2mf;
unsigned dst_layer = dstz, src_layer = src_box->z;
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -704,7 +704,7 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit)
};
blit->vp.type = PIPE_SHADER_VERTEX;
- blit->vp.translated = TRUE;
+ blit->vp.translated = true;
if (blit->screen->base.class_3d >= GM107_3D_CLASS) {
blit->vp.code = (uint32_t *)code_gm107; /* const_cast */
blit->vp.code_size = sizeof(code_gm107);
@@ -1217,7 +1217,7 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
int i;
uint32_t mode;
uint32_t mask = nv50_blit_eng2d_get_mask(info);
- boolean b;
+ bool b;
mode = nv50_blit_get_filter(info) ?
NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
@@ -1376,39 +1376,40 @@ static void
nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
- boolean eng3d = FALSE;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ bool eng3d = false;
if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
if (!(info->mask & PIPE_MASK_ZS))
return;
if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
- eng3d = TRUE;
+ eng3d = true;
if (info->filter != PIPE_TEX_FILTER_NEAREST)
- eng3d = TRUE;
+ eng3d = true;
} else {
if (!(info->mask & PIPE_MASK_RGBA))
return;
if (info->mask != PIPE_MASK_RGBA)
- eng3d = TRUE;
+ eng3d = true;
}
if (nv50_miptree(info->src.resource)->layout_3d) {
- eng3d = TRUE;
+ eng3d = true;
} else
if (info->src.box.depth != info->dst.box.depth) {
- eng3d = TRUE;
+ eng3d = true;
debug_printf("blit: cannot filter array or cube textures in z direction");
}
if (!eng3d && info->dst.format != info->src.format) {
if (!nv50_2d_dst_format_faithful(info->dst.format)) {
- eng3d = TRUE;
+ eng3d = true;
} else
if (!nv50_2d_src_format_faithful(info->src.format)) {
if (!util_format_is_luminance(info->src.format)) {
if (!nv50_2d_dst_format_ops_supported(info->dst.format))
- eng3d = TRUE;
+ eng3d = true;
else
if (util_format_is_intensity(info->src.format))
eng3d = info->src.format != PIPE_FORMAT_I8_UNORM;
@@ -1420,30 +1421,36 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
}
} else
if (util_format_is_luminance_alpha(info->src.format))
- eng3d = TRUE;
+ eng3d = true;
}
if (info->src.resource->nr_samples == 8 &&
info->dst.resource->nr_samples <= 1)
- eng3d = TRUE;
+ eng3d = true;
#if 0
/* FIXME: can't make this work with eng2d anymore, at least not on nv50 */
if (info->src.resource->nr_samples > 1 ||
info->dst.resource->nr_samples > 1)
- eng3d = TRUE;
+ eng3d = true;
#endif
/* FIXME: find correct src coordinates adjustments */
if ((info->src.box.width != info->dst.box.width &&
info->src.box.width != -info->dst.box.width) ||
(info->src.box.height != info->dst.box.height &&
info->src.box.height != -info->dst.box.height))
- eng3d = TRUE;
+ eng3d = true;
+
+ if (nvc0->screen->num_occlusion_queries_active)
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
if (!eng3d)
nvc0_blit_eng2d(nvc0, info);
else
nvc0_blit_3d(nvc0, info);
+ if (nvc0->screen->num_occlusion_queries_active)
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
+
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1);
}
@@ -1453,13 +1460,13 @@ nvc0_flush_resource(struct pipe_context *ctx,
{
}
-boolean
+bool
nvc0_blitter_create(struct nvc0_screen *screen)
{
screen->blitter = CALLOC_STRUCT(nvc0_blitter);
if (!screen->blitter) {
NOUVEAU_ERR("failed to allocate blitter struct\n");
- return FALSE;
+ return false;
}
screen->blitter->screen = screen;
@@ -1468,7 +1475,7 @@ nvc0_blitter_create(struct nvc0_screen *screen)
nvc0_blitter_make_vp(screen->blitter);
nvc0_blitter_make_sampler(screen->blitter);
- return TRUE;
+ return true;
}
void
@@ -1491,20 +1498,20 @@ nvc0_blitter_destroy(struct nvc0_screen *screen)
FREE(blitter);
}
-boolean
+bool
nvc0_blitctx_create(struct nvc0_context *nvc0)
{
nvc0->blit = CALLOC_STRUCT(nvc0_blitctx);
if (!nvc0->blit) {
NOUVEAU_ERR("failed to allocate blit context\n");
- return FALSE;
+ return false;
}
nvc0->blit->nvc0 = nvc0;
nvc0->blit->rast.pipe.half_pixel_center = 1;
- return TRUE;
+ return true;
}
void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index ddc0409ca86..d19082e0e15 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -34,8 +34,8 @@
(NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
-static INLINE uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+static inline uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
{
switch (swz) {
case PIPE_SWIZZLE_RED:
@@ -82,7 +82,7 @@ nvc0_create_texture_view(struct pipe_context *pipe,
uint32_t depth;
struct nv50_tic_entry *view;
struct nv50_miptree *mt;
- boolean tex_int;
+ bool tex_int;
view = MALLOC_STRUCT(nv50_tic_entry);
if (!view)
@@ -195,7 +195,7 @@ nvc0_create_texture_view(struct pipe_context *pipe,
default:
NOUVEAU_ERR("unexpected/invalid texture target: %d\n",
mt->base.base.target);
- return FALSE;
+ return false;
}
tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
@@ -226,7 +226,7 @@ nvc0_create_texture_view(struct pipe_context *pipe,
return &view->pipe;
}
-static boolean
+static bool
nvc0_validate_tic(struct nvc0_context *nvc0, int s)
{
uint32_t commands[32];
@@ -234,12 +234,12 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
struct nouveau_bo *txc = nvc0->screen->txc;
unsigned i;
unsigned n = 0;
- boolean need_flush = FALSE;
+ bool need_flush = false;
for (i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
struct nv04_resource *res;
- const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+ const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
if (!tic) {
if (dirty)
@@ -263,7 +263,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
BEGIN_NIC0(push, NVC0_M2MF(DATA), 8);
PUSH_DATAp(push, &tic->tic[0], 8);
- need_flush = TRUE;
+ need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
@@ -295,18 +295,18 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
return need_flush;
}
-static boolean
+static bool
nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
{
struct nouveau_bo *txc = nvc0->screen->txc;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
unsigned i;
- boolean need_flush = FALSE;
+ bool need_flush = false;
for (i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
struct nv04_resource *res;
- const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+ const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
if (!tic) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
@@ -328,7 +328,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
PUSH_DATA (push, 0x1001);
PUSH_DATAp(push, &tic->tic[0], 8);
- need_flush = TRUE;
+ need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
@@ -356,16 +356,14 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
void nvc0_validate_textures(struct nvc0_context *nvc0)
{
- boolean need_flush;
-
- if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
- need_flush = nve4_validate_tic(nvc0, 0);
- need_flush |= nve4_validate_tic(nvc0, 3);
- need_flush |= nve4_validate_tic(nvc0, 4);
- } else {
- need_flush = nvc0_validate_tic(nvc0, 0);
- need_flush |= nvc0_validate_tic(nvc0, 3);
- need_flush |= nvc0_validate_tic(nvc0, 4);
+ bool need_flush = false;
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+ need_flush |= nve4_validate_tic(nvc0, i);
+ else
+ need_flush |= nvc0_validate_tic(nvc0, i);
}
if (need_flush) {
@@ -374,14 +372,14 @@ void nvc0_validate_textures(struct nvc0_context *nvc0)
}
}
-static boolean
+static bool
nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
{
uint32_t commands[16];
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
unsigned i;
unsigned n = 0;
- boolean need_flush = FALSE;
+ bool need_flush = false;
for (i = 0; i < nvc0->num_samplers[s]; ++i) {
struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
@@ -398,7 +396,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
32, tsc->tsc);
- need_flush = TRUE;
+ need_flush = true;
}
nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
@@ -418,13 +416,13 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
return need_flush;
}
-boolean
+bool
nve4_validate_tsc(struct nvc0_context *nvc0, int s)
{
struct nouveau_bo *txc = nvc0->screen->txc;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
unsigned i;
- boolean need_flush = FALSE;
+ bool need_flush = false;
for (i = 0; i < nvc0->num_samplers[s]; ++i) {
struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
@@ -447,7 +445,7 @@ nve4_validate_tsc(struct nvc0_context *nvc0, int s)
PUSH_DATA (push, 0x1001);
PUSH_DATAp(push, &tsc->tsc[0], 8);
- need_flush = TRUE;
+ need_flush = true;
}
nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
@@ -466,16 +464,14 @@ nve4_validate_tsc(struct nvc0_context *nvc0, int s)
void nvc0_validate_samplers(struct nvc0_context *nvc0)
{
- boolean need_flush;
-
- if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
- need_flush = nve4_validate_tsc(nvc0, 0);
- need_flush |= nve4_validate_tsc(nvc0, 3);
- need_flush |= nve4_validate_tsc(nvc0, 4);
- } else {
- need_flush = nvc0_validate_tsc(nvc0, 0);
- need_flush |= nvc0_validate_tsc(nvc0, 3);
- need_flush |= nvc0_validate_tsc(nvc0, 4);
+ bool need_flush = false;
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+ need_flush |= nve4_validate_tsc(nvc0, i);
+ else
+ need_flush |= nvc0_validate_tsc(nvc0, i);
}
if (need_flush) {
@@ -645,13 +641,13 @@ nve4_set_surface_info(struct nouveau_pushbuf *push,
}
}
-static INLINE void
+static inline void
nvc0_update_surface_bindings(struct nvc0_context *nvc0)
{
/* TODO */
}
-static INLINE void
+static inline void
nve4_update_surface_bindings(struct nvc0_context *nvc0)
{
/* TODO */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index 45c6f7cc3ca..7cc5b4b1f48 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -329,17 +329,17 @@ nve4_m2mf_copy_linear(struct nouveau_context *nv,
}
-static INLINE boolean
+static inline bool
nvc0_mt_transfer_can_map_directly(struct nv50_miptree *mt)
{
if (mt->base.domain == NOUVEAU_BO_VRAM)
- return FALSE;
+ return false;
if (mt->base.base.usage != PIPE_USAGE_STAGING)
- return FALSE;
+ return false;
return !nouveau_bo_memtype(mt->base.bo);
}
-static INLINE boolean
+static inline bool
nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
{
if (!mt->base.mm) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 8cf2584b0ce..6f9e7906713 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -61,8 +61,8 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
so->num_elements = num_elements;
so->instance_elts = 0;
so->instance_bufs = 0;
- so->shared_slots = FALSE;
- so->need_conversion = FALSE;
+ so->shared_slots = false;
+ so->need_conversion = false;
memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
@@ -93,7 +93,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
return NULL;
}
so->element[i].state = nvc0_format_table[fmt].vtx;
- so->need_conversion = TRUE;
+ so->need_conversion = true;
}
size = util_format_get_blocksize(fmt);
@@ -141,7 +141,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
if (so->instance_elts || src_offset_max >= (1 << 14))
return so;
- so->shared_slots = TRUE;
+ so->shared_slots = true;
for (i = 0; i < num_elements; ++i) {
const unsigned b = elements[i].vertex_buffer_index;
@@ -196,7 +196,7 @@ nvc0_set_constant_vertex_attrib(struct nvc0_context *nvc0, const unsigned a)
push->cur += 5;
}
-static INLINE void
+static inline void
nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi,
uint32_t *base, uint32_t *size)
{
@@ -214,7 +214,7 @@ nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi,
}
}
-static INLINE void
+static inline void
nvc0_release_user_vbufs(struct nvc0_context *nvc0)
{
if (nvc0->vbo_user) {
@@ -265,7 +265,7 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0)
PUSH_DATAh(push, address[b] + ve->src_offset);
PUSH_DATA (push, address[b] + ve->src_offset);
}
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
}
static void
@@ -419,7 +419,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
uint32_t const_vbos;
unsigned i;
uint8_t vbo_mode;
- boolean update_vertex;
+ bool update_vertex;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
@@ -529,7 +529,7 @@ nvc0_idxbuf_validate(struct nvc0_context *nvc0)
#define NVC0_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
-static INLINE unsigned
+static inline unsigned
nvc0_prim_gl(unsigned prim)
{
switch (prim) {
@@ -547,8 +547,7 @@ nvc0_prim_gl(unsigned prim)
NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
- /*
- NVC0_PRIM_GL_CASE(PATCHES); */
+ NVC0_PRIM_GL_CASE(PATCHES);
default:
return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
}
@@ -559,7 +558,7 @@ nvc0_draw_vbo_kick_notify(struct nouveau_pushbuf *push)
{
struct nvc0_screen *screen = push->user_priv;
- nouveau_fence_update(&screen->base, TRUE);
+ nouveau_fence_update(&screen->base, true);
NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
}
@@ -695,7 +694,7 @@ nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
}
static void
-nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
+nvc0_draw_elements(struct nvc0_context *nvc0, bool shorten,
unsigned mode, unsigned start, unsigned count,
unsigned instance_count, int32_t index_bias)
{
@@ -835,8 +834,8 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size);
}
-static INLINE void
-nvc0_update_prim_restart(struct nvc0_context *nvc0, boolean en, uint32_t index)
+static inline void
+nvc0_update_prim_restart(struct nvc0_context *nvc0, bool en, uint32_t index)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -889,6 +888,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
}
+ if (info->mode == PIPE_PRIM_PATCHES &&
+ nvc0->state.patch_vertices != info->vertices_per_patch) {
+ nvc0->state.patch_vertices = info->vertices_per_patch;
+ IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices);
+ }
+
/* 8 as minimum to avoid immediate double validation of new buffers */
nvc0_state_validate(nvc0, ~0, 8);
@@ -910,13 +915,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
continue;
if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nvc0->cb_dirty = TRUE;
+ nvc0->cb_dirty = true;
}
}
if (nvc0->cb_dirty) {
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
- nvc0->cb_dirty = FALSE;
+ nvc0->cb_dirty = false;
}
if (nvc0->state.vbo_mode) {
@@ -940,19 +945,19 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!nvc0->vtxbuf[i].buffer)
continue;
if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
}
if (!nvc0->base.vbo_dirty && nvc0->idxbuf.buffer &&
nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
nvc0_update_prim_restart(nvc0, info->primitive_restart, info->restart_index);
if (nvc0->base.vbo_dirty) {
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
- nvc0->base.vbo_dirty = FALSE;
+ nvc0->base.vbo_dirty = false;
}
if (unlikely(info->indirect)) {
@@ -962,10 +967,10 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0_draw_stream_output(nvc0, info);
} else
if (info->indexed) {
- boolean shorten = info->max_index <= 65535;
+ bool shorten = info->max_index <= 65535;
if (info->primitive_restart && info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
nvc0_draw_elements(nvc0, shorten,
info->mode, info->start, info->count,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
index f180087161d..8b23a4887da 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -21,12 +21,12 @@ struct push_context {
uint32_t restart_index;
uint32_t instance_id;
- boolean prim_restart;
- boolean need_vertex_id;
+ bool prim_restart;
+ bool need_vertex_id;
struct {
- boolean enabled;
- boolean value;
+ bool enabled;
+ bool value;
unsigned stride;
const uint8_t *data;
} edgeflag;
@@ -47,7 +47,7 @@ nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
ctx->need_vertex_id =
nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32);
- ctx->edgeflag.value = TRUE;
+ ctx->edgeflag.value = true;
ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS;
/* silence warnings */
@@ -55,7 +55,7 @@ nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
ctx->edgeflag.stride = 0;
}
-static INLINE void
+static inline void
nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias)
{
struct translate *translate = nvc0->vertex->translate;
@@ -78,7 +78,7 @@ nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias)
}
}
-static INLINE void
+static inline void
nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0)
{
if (nvc0->idxbuf.buffer) {
@@ -90,7 +90,7 @@ nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0)
}
}
-static INLINE void
+static inline void
nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
int32_t index_bias)
{
@@ -112,7 +112,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
ctx->edgeflag.data += (intptr_t)index_bias * vb->stride;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index)
{
unsigned i;
@@ -120,7 +120,7 @@ prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index)
{
unsigned i;
@@ -128,7 +128,7 @@ prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
{
unsigned i;
@@ -136,21 +136,21 @@ prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
return i;
}
-static INLINE boolean
+static inline bool
ef_value(const struct push_context *ctx, uint32_t index)
{
float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
- return *pf ? TRUE : FALSE;
+ return *pf ? true : false;
}
-static INLINE boolean
+static inline bool
ef_toggle(struct push_context *ctx)
{
ctx->edgeflag.value = !ctx->edgeflag.value;
return ctx->edgeflag.value;
}
-static INLINE unsigned
+static inline unsigned
ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
{
unsigned i;
@@ -158,7 +158,7 @@ ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
return i;
}
-static INLINE unsigned
+static inline unsigned
ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
{
unsigned i;
@@ -166,7 +166,7 @@ ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
return i;
}
-static INLINE unsigned
+static inline unsigned
ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
{
unsigned i;
@@ -174,7 +174,7 @@ ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
return i;
}
-static INLINE unsigned
+static inline unsigned
ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
{
unsigned i;
@@ -182,7 +182,7 @@ ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
return i;
}
-static INLINE void *
+static inline void *
nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -409,7 +409,7 @@ disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
#define NVC0_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
-static INLINE unsigned
+static inline unsigned
nvc0_prim_gl(unsigned prim)
{
switch (prim) {
@@ -427,8 +427,7 @@ nvc0_prim_gl(unsigned prim)
NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
- /*
- NVC0_PRIM_GL_CASE(PATCHES); */
+ NVC0_PRIM_GL_CASE(PATCHES);
default:
return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
}
@@ -483,7 +482,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
struct pipe_context *pipe = &nvc0->base.pipe;
struct nvc0_so_target *targ;
targ = nvc0_so_target(info->count_from_stream_output);
- pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count);
vert_count /= targ->stride;
}
ctx.idxbuf = NULL; /* shut up warnings */
@@ -560,7 +559,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1);
}
-static INLINE void
+static inline void
copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n)
{
unsigned i;
@@ -568,7 +567,7 @@ copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n)
dst[i] = elts[i] + bias;
}
-static INLINE void
+static inline void
copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n)
{
unsigned i;
@@ -576,7 +575,7 @@ copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n)
dst[i] = elts[i] + bias;
}
-static INLINE void
+static inline void
copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n)
{
unsigned i;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
index 725e889683f..4ea8ca3cfa2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
@@ -15,14 +15,14 @@
#endif
-static INLINE void
+static inline void
nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
unsigned flags, struct nouveau_bo *bo)
{
nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
}
-static INLINE void
+static inline void
nvc0_add_resident(struct nouveau_bufctx *bufctx, int bin,
struct nv04_resource *res, unsigned flags)
{
@@ -38,7 +38,7 @@ nvc0_add_resident(struct nouveau_bufctx *bufctx, int bin,
#define BCTX_REFN(bctx, bin, res, acc) \
nvc0_add_resident(bctx, NVC0_BIND_##bin, res, NOUVEAU_BO_##acc)
-static INLINE void
+static inline void
PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
{
struct nouveau_pushbuf_refn ref = { bo, flags };
@@ -69,46 +69,46 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define NVC0_3D_SERIALIZE NV50_GRAPH_SERIALIZE
-static INLINE uint32_t
+static inline uint32_t
NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size)
{
return 0x20000000 | (size << 16) | (subc << 13) | (mthd >> 2);
}
-static INLINE uint32_t
+static inline uint32_t
NVC0_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
{
return 0x60000000 | (size << 16) | (subc << 13) | (mthd >> 2);
}
-static INLINE uint32_t
+static inline uint32_t
NVC0_FIFO_PKHDR_IL(int subc, int mthd, uint16_t data)
{
assert(data < 0x2000);
return 0x80000000 | (data << 16) | (subc << 13) | (mthd >> 2);
}
-static INLINE uint32_t
+static inline uint32_t
NVC0_FIFO_PKHDR_1I(int subc, int mthd, unsigned size)
{
return 0xa0000000 | (size << 16) | (subc << 13) | (mthd >> 2);
}
-static INLINE uint8_t
+static inline uint8_t
nouveau_bo_memtype(const struct nouveau_bo *bo)
{
return bo->config.nvc0.memtype;
}
-static INLINE void
+static inline void
PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
{
*push->cur++ = (uint32_t)(data >> 32);
}
-static INLINE void
+static inline void
BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
@@ -117,7 +117,7 @@ BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(subc, mthd, size));
}
-static INLINE void
+static inline void
BEGIN_NIC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
@@ -126,7 +126,7 @@ BEGIN_NIC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
PUSH_DATA (push, NVC0_FIFO_PKHDR_NI(subc, mthd, size));
}
-static INLINE void
+static inline void
BEGIN_1IC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
@@ -135,7 +135,7 @@ BEGIN_1IC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
PUSH_DATA (push, NVC0_FIFO_PKHDR_1I(subc, mthd, size));
}
-static INLINE void
+static inline void
IMMED_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, uint16_t data)
{
#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index fce02a7cc57..d3e5676873e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -250,7 +250,7 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
static void
nve4_compute_validate_samplers(struct nvc0_context *nvc0)
{
- boolean need_flush = nve4_validate_tsc(nvc0, 5);
+ bool need_flush = nve4_validate_tsc(nvc0, 5);
if (need_flush) {
BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
@@ -299,11 +299,11 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
}
-static boolean
+static bool
nve4_compute_state_validate(struct nvc0_context *nvc0)
{
if (!nvc0_compute_validate_program(nvc0))
- return FALSE;
+ return false;
if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
nve4_compute_validate_textures(nvc0);
if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
@@ -316,15 +316,15 @@ nve4_compute_state_validate(struct nvc0_context *nvc0)
nvc0_validate_global_residents(nvc0,
nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
- return FALSE;
+ return false;
if (unlikely(nvc0->state.flushed))
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
- return TRUE;
+ return true;
}
@@ -364,7 +364,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
-static INLINE uint8_t
+static inline uint8_t
nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
{
if (shared_size > (32 << 10))
@@ -413,7 +413,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
}
-static INLINE struct nve4_cp_launch_desc *
+static inline struct nve4_cp_launch_desc *
nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
struct nouveau_bo **pbo, uint64_t *pgpuaddr)
{
@@ -505,7 +505,7 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
for (i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
struct nv04_resource *res;
- const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+ const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
if (!tic) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
@@ -575,18 +575,18 @@ nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
{
const uint32_t *data = (const uint32_t *)desc;
unsigned i;
- boolean zero = FALSE;
+ bool zero = false;
debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
for (i = 0; i < sizeof(*desc); i += 4) {
if (data[i / 4]) {
debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
- zero = FALSE;
+ zero = false;
} else
if (!zero) {
debug_printf("...\n");
- zero = TRUE;
+ zero = true;
}
}
@@ -606,7 +606,7 @@ nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
for (i = 0; i < 8; ++i) {
uint64_t address;
uint32_t size = desc->cb[i].size;
- boolean valid = !!(desc->cb_mask & (1 << i));
+ bool valid = !!(desc->cb_mask & (1 << i));
address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
index 4d7af54d860..7364a68a579 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
@@ -68,7 +68,7 @@ struct nve4_cp_launch_desc
u32 unk48[16];
};
-static INLINE void
+static inline void
nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
{
memset(desc, 0, sizeof(*desc));
@@ -78,7 +78,7 @@ nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
desc->unk47_20 = 0x300;
}
-static INLINE void
+static inline void
nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
unsigned index,
struct nouveau_bo *bo,
@@ -96,7 +96,7 @@ nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
desc->cb_mask |= 1 << index;
}
-static INLINE void
+static inline void
nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc,
unsigned index,
const struct nvc0_constbuf *cb)
diff --git a/src/gallium/drivers/r300/Makefile.am b/src/gallium/drivers/r300/Makefile.am
index dd1a5ede19b..081f332683e 100644
--- a/src/gallium/drivers/r300/Makefile.am
+++ b/src/gallium/drivers/r300/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index baf05cea965..6ea8f24cc14 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -382,7 +382,7 @@ static void r300_clear(struct pipe_context* pipe,
r300_get_num_cs_end_dwords(r300);
/* Reserve CS space. */
- if (dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
+ if (dwords > (r300->cs->max_dw - r300->cs->cdw)) {
r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
}
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index c35aa3b24aa..8c24ad6d98a 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -94,6 +94,8 @@ static void r300_destroy_context(struct pipe_context* context)
if (r300->cs)
r300->rws->cs_destroy(r300->cs);
+ if (r300->ctx)
+ r300->rws->ctx_destroy(r300->ctx);
rc_destroy_regalloc_state(&r300->fs_regalloc_state);
@@ -382,7 +384,11 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
sizeof(struct pipe_transfer), 64,
UTIL_SLAB_SINGLETHREADED);
- r300->cs = rws->cs_create(rws, RING_GFX, r300_flush_callback, r300, NULL);
+ r300->ctx = rws->ctx_create(rws);
+ if (!r300->ctx)
+ goto fail;
+
+ r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300, NULL);
if (r300->cs == NULL)
goto fail;
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 3873c9a31c1..18ae11a3a24 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -449,6 +449,8 @@ struct r300_context {
/* The interface to the windowing system, etc. */
struct radeon_winsys *rws;
+ /* The submission context. */
+ struct radeon_winsys_ctx *ctx;
/* The command stream. */
struct radeon_winsys_cs *cs;
/* Screen. */
@@ -647,32 +649,32 @@ struct r300_context {
for (atom = r300->first_dirty; atom != r300->last_dirty; atom++)
/* Convenience cast wrappers. */
-static INLINE struct r300_query* r300_query(struct pipe_query* q)
+static inline struct r300_query* r300_query(struct pipe_query* q)
{
return (struct r300_query*)q;
}
-static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf)
+static inline struct r300_surface* r300_surface(struct pipe_surface* surf)
{
return (struct r300_surface*)surf;
}
-static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex)
+static inline struct r300_resource* r300_resource(struct pipe_resource* tex)
{
return (struct r300_resource*)tex;
}
-static INLINE struct r300_context* r300_context(struct pipe_context* context)
+static inline struct r300_context* r300_context(struct pipe_context* context)
{
return (struct r300_context*)context;
}
-static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
+static inline struct r300_fragment_shader *r300_fs(struct r300_context *r300)
{
return (struct r300_fragment_shader*)r300->fs.state;
}
-static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
+static inline void r300_mark_atom_dirty(struct r300_context *r300,
struct r300_atom *atom)
{
atom->dirty = TRUE;
@@ -688,7 +690,7 @@ static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
}
}
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
r300_get_nonnull_cb(struct pipe_framebuffer_state *fb, unsigned i)
{
if (fb->cbufs[i])
@@ -777,12 +779,12 @@ void r300_update_derived_state(struct r300_context* r300);
void r500_dump_rs_block(struct r300_rs_block *rs);
-static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
+static inline boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
{
return SCREEN_DBG_ON(ctx->screen, flags);
}
-static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags,
+static inline void CTX_DBG(struct r300_context * ctx, unsigned flags,
const char * fmt, ...)
{
if (CTX_DBG_ON(ctx, flags)) {
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 37f9641ab3e..fc150542d4b 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -46,7 +46,7 @@
#ifdef DEBUG
#define BEGIN_CS(size) do { \
- assert(size <= (RADEON_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \
+ assert(size <= (cs_copy->max_dw - cs_copy->cdw)); \
cs_count = size; \
} while (0)
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 39eb73da65d..b39624dad5f 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -77,14 +77,14 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
/* Return TRUE if the shader was switched and should be re-emitted. */
boolean r300_pick_fragment_shader(struct r300_context* r300);
-static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
+static inline boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
{
if (!fs)
return FALSE;
return (fs->shader->code.writes_depth) ? TRUE : FALSE;
}
-static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs)
+static inline boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs)
{
if (!fs)
return FALSE;
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 01b83b87fcf..4dd8156f616 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -146,10 +146,11 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
if (q->type == PIPE_QUERY_GPU_FINISHED) {
if (wait) {
- r300->rws->buffer_wait(q->buf, RADEON_USAGE_READWRITE);
+ r300->rws->buffer_wait(q->buf, PIPE_TIMEOUT_INFINITE,
+ RADEON_USAGE_READWRITE);
vresult->b = TRUE;
} else {
- vresult->b = !r300->rws->buffer_is_busy(q->buf, RADEON_USAGE_READWRITE);
+ vresult->b = r300->rws->buffer_wait(q->buf, 0, RADEON_USAGE_READWRITE);
}
return vresult->b;
}
@@ -168,8 +169,6 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
map++;
}
- r300->rws->buffer_unmap(q->cs_buf);
-
if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
vresult->b = temp != 0;
} else {
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 4c951d14f10..0487b11e775 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -215,7 +215,7 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300,
cs_dwords += r300_get_num_cs_end_dwords(r300);
/* Reserve requested CS space. */
- if (cs_dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
+ if (cs_dwords > (r300->cs->max_dw - r300->cs->cdw)) {
r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
flushed = TRUE;
}
@@ -871,7 +871,7 @@ struct r300_render {
uint8_t *vbo_ptr;
};
-static INLINE struct r300_render*
+static inline struct r300_render*
r300_render(struct vbuf_render* render)
{
return (struct r300_render*)render;
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a7bca915f57..4ca0b268bde 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -191,6 +191,10 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
/* SWTCL-only features. */
@@ -427,7 +431,7 @@ static int r300_get_video_param(struct pipe_screen *screen,
* Whether the format matches:
* PIPE_FORMAT_?10?10?10?2_UNORM
*/
-static INLINE boolean
+static inline boolean
util_format_is_rgba1010102_variant(const struct util_format_description *desc)
{
static const unsigned size[4] = {10, 10, 10, 2};
@@ -660,14 +664,6 @@ static void r300_fence_reference(struct pipe_screen *screen,
rws->fence_reference(ptr, fence);
}
-static boolean r300_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct radeon_winsys *rws = r300_screen(screen)->rws;
-
- return rws->fence_wait(rws, fence, 0);
-}
-
static boolean r300_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
@@ -712,7 +708,6 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws)
r300screen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
r300screen->screen.context_create = r300_create_context;
r300screen->screen.fence_reference = r300_fence_reference;
- r300screen->screen.fence_signalled = r300_fence_signalled;
r300screen->screen.fence_finish = r300_fence_finish;
r300_init_screen_resource_functions(r300screen);
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 7bba39bf12b..e15c3c7de0c 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -51,11 +51,11 @@ struct r300_screen {
/* Convenience cast wrappers. */
-static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
+static inline struct r300_screen* r300_screen(struct pipe_screen* screen) {
return (struct r300_screen*)screen;
}
-static INLINE struct radeon_winsys *
+static inline struct radeon_winsys *
radeon_winsys(struct pipe_screen *screen) {
return r300_screen(screen)->rws;
}
@@ -102,12 +102,12 @@ radeon_winsys(struct pipe_screen *screen) {
#define DBG_P_STAT (1 << 25)
/*@}*/
-static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
+static inline boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
{
return (screen->debug & flags) ? TRUE : FALSE;
}
-static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags,
+static inline void SCREEN_DBG(struct r300_screen * screen, unsigned flags,
const char * fmt, ...)
{
if (SCREEN_DBG_ON(screen, flags)) {
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index de557b57776..6451a2c8df2 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -96,7 +96,7 @@ r300_buffer_transfer_map( struct pipe_context *context,
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, RADEON_USAGE_READWRITE) ||
- r300->rws->buffer_is_busy(rbuf->buf, RADEON_USAGE_READWRITE)) {
+ !r300->rws->buffer_wait(rbuf->buf, 0, RADEON_USAGE_READWRITE)) {
unsigned i;
struct pb_buffer *new_buf;
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h
index b4c8520039b..14b849c8c93 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.h
+++ b/src/gallium/drivers/r300/r300_screen_buffer.h
@@ -46,7 +46,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
/* Inline functions. */
-static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
+static inline struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
{
return (struct r300_buffer *)buffer;
}
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
index b756048c6c7..93bbc9d4a96 100644
--- a/src/gallium/drivers/r300/r300_shader_semantics.h
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -46,7 +46,7 @@ struct r300_shader_semantics {
int num_generic;
};
-static INLINE void r300_shader_semantics_reset(
+static inline void r300_shader_semantics_reset(
struct r300_shader_semantics* info)
{
int i;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index e886df87a60..d99d5ae0152 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -844,7 +844,7 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300,
tex->tex.macrotile[level]) {
r300->rws->buffer_set_tiling(tex->buf, r300->cs,
tex->tex.microtile, tex->tex.macrotile[level],
- 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
tex->tex.stride_in_bytes[0], false);
tex->surface_level = level;
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index feec494c4dc..fbd91cda9fe 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -32,13 +32,13 @@
/* Some maths. These should probably find their way to u_math, if needed. */
-static INLINE int pack_float_16_6x(float f) {
+static inline int pack_float_16_6x(float f) {
return ((int)(f * 6.0) & 0xffff);
}
/* Blend state. */
-static INLINE uint32_t r300_translate_blend_function(int blend_func,
+static inline uint32_t r300_translate_blend_function(int blend_func,
boolean clamp)
{
switch (blend_func) {
@@ -60,7 +60,7 @@ static INLINE uint32_t r300_translate_blend_function(int blend_func,
return 0;
}
-static INLINE uint32_t r300_translate_blend_factor(int blend_fact)
+static inline uint32_t r300_translate_blend_factor(int blend_fact)
{
switch (blend_fact) {
case PIPE_BLENDFACTOR_ONE:
@@ -113,7 +113,7 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact)
/* DSA state. */
-static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func)
+static inline uint32_t r300_translate_depth_stencil_function(int zs_func)
{
switch (zs_func) {
case PIPE_FUNC_NEVER:
@@ -141,7 +141,7 @@ static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func)
return 0;
}
-static INLINE uint32_t r300_translate_stencil_op(int s_op)
+static inline uint32_t r300_translate_stencil_op(int s_op)
{
switch (s_op) {
case PIPE_STENCIL_OP_KEEP:
@@ -168,7 +168,7 @@ static INLINE uint32_t r300_translate_stencil_op(int s_op)
return 0;
}
-static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
+static inline uint32_t r300_translate_alpha_function(int alpha_func)
{
switch (alpha_func) {
case PIPE_FUNC_NEVER:
@@ -195,7 +195,7 @@ static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
return 0;
}
-static INLINE uint32_t
+static inline uint32_t
r300_translate_polygon_mode_front(unsigned mode) {
switch (mode)
{
@@ -213,7 +213,7 @@ r300_translate_polygon_mode_front(unsigned mode) {
}
}
-static INLINE uint32_t
+static inline uint32_t
r300_translate_polygon_mode_back(unsigned mode) {
switch (mode)
{
@@ -233,7 +233,7 @@ r300_translate_polygon_mode_back(unsigned mode) {
/* Texture sampler state. */
-static INLINE uint32_t r300_translate_wrap(int wrap)
+static inline uint32_t r300_translate_wrap(int wrap)
{
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
@@ -259,7 +259,7 @@ static INLINE uint32_t r300_translate_wrap(int wrap)
}
}
-static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
+static inline uint32_t r300_translate_tex_filters(int min, int mag, int mip,
boolean is_anisotropic)
{
uint32_t retval = 0;
@@ -308,7 +308,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
return retval;
}
-static INLINE uint32_t r300_anisotropy(unsigned max_aniso)
+static inline uint32_t r300_anisotropy(unsigned max_aniso)
{
if (max_aniso >= 16) {
return R300_TX_MAX_ANISO_16_TO_1;
@@ -323,7 +323,7 @@ static INLINE uint32_t r300_anisotropy(unsigned max_aniso)
}
}
-static INLINE uint32_t r500_anisotropy(unsigned max_aniso)
+static inline uint32_t r500_anisotropy(unsigned max_aniso)
{
if (!max_aniso) {
return 0;
@@ -336,7 +336,7 @@ static INLINE uint32_t r500_anisotropy(unsigned max_aniso)
}
/* Translate pipe_formats into PSC vertex types. */
-static INLINE uint16_t
+static inline uint16_t
r300_translate_vertex_data_type(enum pipe_format format) {
uint32_t result = 0;
const struct util_format_description *desc;
@@ -410,7 +410,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
return result;
}
-static INLINE uint16_t
+static inline uint16_t
r300_translate_vertex_data_swizzle(enum pipe_format format) {
const struct util_format_description *desc;
unsigned i, swizzle = 0;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 6c01c0d21e4..5e4d50df27d 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -1063,7 +1063,7 @@ r300_texture_create_object(struct r300_screen *rscreen,
rws->buffer_set_tiling(tex->buf, NULL,
tex->tex.microtile, tex->tex.macrotile[0],
- 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
tex->tex.stride_in_bytes[0], false);
return tex;
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index b87164ba836..44303792f51 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -41,7 +41,7 @@ struct r300_transfer {
};
/* Convenience cast wrapper. */
-static INLINE struct r300_transfer*
+static inline struct r300_transfer*
r300_transfer(struct pipe_transfer* transfer)
{
return (struct r300_transfer*)transfer;
@@ -120,7 +120,7 @@ r300_texture_transfer_map(struct pipe_context *ctx,
referenced_hw = TRUE;
} else {
referenced_hw =
- r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE);
+ !r300->rws->buffer_wait(tex->buf, 0, RADEON_USAGE_READWRITE);
}
trans = CALLOC_STRUCT(r300_transfer);
@@ -251,16 +251,12 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx,
struct r300_resource *tex = r300_resource(transfer->resource);
if (trans->linear_texture) {
- rws->buffer_unmap(trans->linear_texture->cs_buf);
-
if (transfer->usage & PIPE_TRANSFER_WRITE) {
r300_copy_into_tiled_texture(ctx, trans);
}
pipe_resource_reference(
(struct pipe_resource**)&trans->linear_texture, NULL);
- } else {
- rws->buffer_unmap(tex->cs_buf);
}
FREE(transfer);
}
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am
index dc0d90d759b..8317da727a2 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 295cb4d80b7..42e8b0b1761 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -160,6 +160,9 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
alu.op = ALU_OP1_MOVA_INT;
alu.src[0].sel = bc->index_reg[id];
alu.src[0].chan = 0;
+ if (bc->chip_class == CAYMAN)
+ alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
+
alu.last = 1;
r = r600_bytecode_add_alu(bc, &alu);
if (r)
@@ -167,12 +170,14 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
bc->ar_loaded = 0; /* clobbered */
- memset(&alu, 0, sizeof(alu));
- alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
- alu.last = 1;
- r = r600_bytecode_add_alu(bc, &alu);
- if (r)
- return r;
+ if (bc->chip_class == EVERGREEN) {
+ memset(&alu, 0, sizeof(alu));
+ alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(bc, &alu);
+ if (r)
+ return r;
+ }
/* Must split ALU group as index only applies to following group */
if (inside_alu_clause) {
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index b534872f062..97e230f56c7 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -521,4 +521,11 @@
#define V_SQ_REL_ABSOLUTE 0
#define V_SQ_REL_RELATIVE 1
+
+/* CAYMAN has special encoding for MOVA_INT destination */
+#define CM_V_SQ_MOVA_DST_AR_X 0
+#define CM_V_SQ_MOVA_DST_CF_PC 1
+#define CM_V_SQ_MOVA_DST_CF_IDX0 2
+#define CM_V_SQ_MOVA_DST_CF_IDX1 3
+
#endif
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 4c3c34cd664..c52e43e9c2a 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -163,7 +163,7 @@ static void evergreen_cs_set_vertex_buffer(
rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE;
state->enabled_mask |= 1 << vb_index;
state->dirty_mask |= 1 << vb_index;
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
static void evergreen_cs_set_constant_buffer(
@@ -226,7 +226,7 @@ void *evergreen_create_compute_state(
}
#else
memset(&shader->binary, 0, sizeof(shader->binary));
- radeon_elf_read(code, header->num_bytes, &shader->binary, true);
+ radeon_elf_read(code, header->num_bytes, &shader->binary);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
@@ -487,6 +487,12 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
/* Emit constant buffer state */
r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);
+ /* Emit sampler state */
+ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom);
+
+ /* Emit sampler view (texture resource) state */
+ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom);
+
/* Emit compute shader state */
r600_emit_atom(ctx, &ctx->cs_shader_state.atom);
@@ -655,25 +661,6 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
}
}
-void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
- unsigned start_slot, unsigned count,
- struct pipe_sampler_view **views)
-{
- struct r600_pipe_sampler_view **resource =
- (struct r600_pipe_sampler_view **)views;
-
- for (unsigned i = 0; i < count; i++) {
- if (resource[i]) {
- assert(i+1 < 12);
- /* XXX: Implement */
- assert(!"Compute samplers not implemented.");
- ///FETCH0 = VTX0 (param buffer),
- //FETCH1 = VTX1 (global buffer pool), FETCH2... = TEX
- }
- }
-}
-
-
static void evergreen_set_global_binding(
struct pipe_context *ctx_, unsigned first, unsigned n,
struct pipe_resource **resources,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 4ddbc0beba5..6a91d4709f4 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -32,7 +32,7 @@
#include "evergreen_compute.h"
#include "util/u_math.h"
-static INLINE unsigned evergreen_array_mode(unsigned mode)
+static inline unsigned evergreen_array_mode(unsigned mode)
{
switch (mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_028C70_ARRAY_LINEAR_ALIGNED;
@@ -485,7 +485,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
/* offset */
rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 12.0f;
+ rs->offset_scale = state->offset_scale * 16.0f;
rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri;
if (state->point_size_per_vertex) {
@@ -896,7 +896,7 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx,
for (i = start_slot; i < start_slot + num_scissors; i++) {
rctx->scissor[i].scissor = state[i - start_slot];
- rctx->scissor[i].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
}
}
@@ -1028,7 +1028,10 @@ void evergreen_init_color_surface(struct r600_context *rctx,
macro_aspect = rtex->surface.mtilea;
bankw = rtex->surface.bankw;
bankh = rtex->surface.bankh;
- fmask_bankh = rtex->fmask.bank_height;
+ if (rtex->fmask.size)
+ fmask_bankh = rtex->fmask.bank_height;
+ else
+ fmask_bankh = rtex->surface.bankh;
tile_split = eg_tile_split(tile_split);
macro_aspect = eg_macro_tile_aspect(macro_aspect);
bankw = eg_bank_wh(bankw);
@@ -1149,10 +1152,11 @@ void evergreen_init_color_surface(struct r600_context *rctx,
surf->cb_color_attrib = color_attrib;
if (rtex->fmask.size) {
surf->cb_color_fmask = (base_offset + rtex->fmask.offset) >> 8;
+ surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
} else {
surf->cb_color_fmask = surf->cb_color_base;
+ surf->cb_color_fmask_slice = S_028C88_TILE_MAX(slice);
}
- surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
surf->color_initialized = true;
}
@@ -1342,11 +1346,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
if (rctx->alphatest_state.bypass != alphatest_bypass) {
rctx->alphatest_state.bypass = alphatest_bypass;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
if (rctx->alphatest_state.cb0_export_16bpc != export_16bpc) {
rctx->alphatest_state.cb0_export_16bpc = export_16bpc;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
}
@@ -1362,28 +1366,28 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
if (state->zsbuf->format != rctx->poly_offset_state.zs_format) {
rctx->poly_offset_state.zs_format = state->zsbuf->format;
- rctx->poly_offset_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
}
if (rctx->db_state.rsurf != surf) {
rctx->db_state.rsurf = surf;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
} else if (rctx->db_state.rsurf) {
rctx->db_state.rsurf = NULL;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) {
rctx->alphatest_state.bypass = false;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
log_samples = util_logbase2(rctx->framebuffer.nr_samples);
@@ -1392,7 +1396,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
rctx->b.family == CHIP_RV770) &&
rctx->db_misc_state.log_samples != log_samples) {
rctx->db_misc_state.log_samples = log_samples;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
@@ -1420,7 +1424,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
rctx->framebuffer.atom.num_dw += 4;
}
- rctx->framebuffer.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
r600_set_sample_locations_constant_buffer(rctx);
}
@@ -1434,7 +1438,7 @@ static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_sam
rctx->ps_iter_samples = min_samples;
if (rctx->framebuffer.nr_samples > 1) {
- rctx->framebuffer.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
}
}
@@ -1732,10 +1736,10 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
- /* Always enable the first colorbuffer in CB_SHADER_MASK. This
- * will assure that the alpha-test will work even if there is
- * no colorbuffer bound. */
- radeon_emit(cs, 0xf | (a->dual_src_blend ? ps_colormask : 0) | fb_colormask); /* R_02823C_CB_SHADER_MASK */
+ /* This must match the used export instructions exactly.
+ * Other values may lead to undefined behavior and hangs.
+ */
+ radeon_emit(cs, ps_colormask); /* R_02823C_CB_SHADER_MASK */
}
static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -1980,7 +1984,7 @@ static void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct
static void evergreen_emit_sampler_views(struct r600_context *rctx,
struct r600_samplerview_state *state,
- unsigned resource_id_base)
+ unsigned resource_id_base, unsigned pkt_flags)
{
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
@@ -1993,7 +1997,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
rview = state->views[resource_index];
assert(rview);
- radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
+ radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
radeon_emit(cs, (resource_id_base + resource_index) * 8);
radeon_emit_array(cs, rview->tex_resource_words, 8);
@@ -2002,11 +2006,11 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
rview->tex_resource->b.b.nr_samples > 1 ?
RADEON_PRIO_SHADER_TEXTURE_MSAA :
RADEON_PRIO_SHADER_TEXTURE_RO);
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, reloc);
if (!rview->skip_mip_address_reloc) {
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, reloc);
}
}
@@ -2015,23 +2019,33 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 176 + R600_MAX_CONST_BUFFERS);
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views,
+ 176 + R600_MAX_CONST_BUFFERS, 0);
}
static void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 336 + R600_MAX_CONST_BUFFERS);
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views,
+ 336 + R600_MAX_CONST_BUFFERS, 0);
}
static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_MAX_CONST_BUFFERS);
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views,
+ R600_MAX_CONST_BUFFERS, 0);
+}
+
+static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
+{
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views,
+ 816 + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_sampler_states(struct r600_context *rctx,
struct r600_textures_info *texinfo,
unsigned resource_id_base,
- unsigned border_index_reg)
+ unsigned border_index_reg,
+ unsigned pkt_flags)
{
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint32_t dirty_mask = texinfo->states.dirty_mask;
@@ -2043,7 +2057,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
rstate = texinfo->states.states[i];
assert(rstate);
- radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0));
+ radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags);
radeon_emit(cs, (resource_id_base + i) * 3);
radeon_emit_array(cs, rstate->tex_sampler_words, 3);
@@ -2058,17 +2072,27 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
static void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX);
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18,
+ R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0);
}
static void evergreen_emit_gs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36, R_00A428_TD_GS_SAMPLER0_BORDER_INDEX);
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36,
+ R_00A428_TD_GS_SAMPLER0_BORDER_INDEX, 0);
}
static void evergreen_emit_ps_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX);
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0,
+ R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0);
+}
+
+static void evergreen_emit_cs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
+{
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE], 90,
+ R_00A464_TD_CS_SAMPLER0_BORDER_INDEX,
+ RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
@@ -3176,7 +3200,7 @@ void evergreen_update_db_shader_control(struct r600_context * rctx)
if (db_shader_control != rctx->db_misc_state.db_shader_control) {
rctx->db_misc_state.db_shader_control = db_shader_control;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -3431,12 +3455,14 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, evergreen_emit_vs_sampler_states, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, evergreen_emit_gs_sampler_states, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].states.atom, id++, evergreen_emit_ps_sampler_states, 0);
+ r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom, id++, evergreen_emit_cs_sampler_states, 0);
/* resources */
r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0);
r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views.atom, id++, evergreen_emit_vs_sampler_views, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0);
+ r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom, id++, evergreen_emit_cs_sampler_views, 0);
r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10);
@@ -3466,8 +3492,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
}
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
- rctx->atoms[id++] = &rctx->b.streamout.begin_atom;
- rctx->atoms[id++] = &rctx->b.streamout.enable_atom;
+ r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
+ r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index cd4ff46b103..ad6ad434b78 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1253,6 +1253,11 @@
#define R_00A430_TD_GS_SAMPLER0_BORDER_GREEN 0x00A430
#define R_00A434_TD_GS_SAMPLER0_BORDER_BLUE 0x00A434
#define R_00A438_TD_GS_SAMPLER0_BORDER_ALPHA 0x00A438
+#define R_00A464_TD_CS_SAMPLER0_BORDER_INDEX 0x00A464
+#define R_00A468_TD_CS_SAMPLER0_BORDER_RED 0x00A468
+#define R_00A46C_TD_CS_SAMPLER0_BORDER_GREEN 0x00A46C
+#define R_00A470_TD_CS_SAMPLER0_BORDER_BLUE 0x00A470
+#define R_00A474_TD_CS_SAMPLER0_BORDER_ALPHA 0x00A474
#define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000
#define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0)
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 01262a59e90..b0002c3b50f 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -145,7 +145,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
rctx->db_misc_state.copy_depth = util_format_has_depth(desc);
rctx->db_misc_state.copy_stencil = util_format_has_stencil(desc);
rctx->db_misc_state.copy_sample = first_sample;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
for (level = first_level; level <= last_level; level++) {
if (!staging && !(texture->dirty_level_mask & (1 << level)))
@@ -162,7 +162,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
if (sample != rctx->db_misc_state.copy_sample) {
rctx->db_misc_state.copy_sample = sample;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
surf_tmpl.format = texture->resource.b.b.format;
@@ -197,7 +197,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
/* reenable compression in DB_RENDER_CONTROL */
rctx->db_misc_state.flush_depthstencil_through_cb = false;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
@@ -210,7 +210,7 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
/* Enable decompression in DB_RENDER_CONTROL */
rctx->db_misc_state.flush_depthstencil_in_place = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
surf_tmpl.format = texture->resource.b.b.format;
@@ -248,7 +248,7 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
/* Disable decompression in DB_RENDER_CONTROL */
rctx->db_misc_state.flush_depthstencil_in_place = false;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
void r600_decompress_depth_textures(struct r600_context *rctx,
@@ -396,6 +396,8 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
&buffers, color);
+ if (!buffers)
+ return; /* all buffers have been fast cleared */
}
if (buffers & PIPE_CLEAR_COLOR) {
@@ -435,10 +437,10 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
fb->zsbuf->u.tex.last_layer == util_max_layer(&rtex->resource.b.b, level)) {
if (rtex->depth_clear_value != depth) {
rtex->depth_clear_value = depth;
- rctx->db_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
}
rctx->db_misc_state.htile_clear = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -451,7 +453,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
/* disable fast clear */
if (rctx->db_misc_state.htile_clear) {
rctx->db_misc_state.htile_clear = false;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h
index fa374d92e6f..9533aaa1378 100644
--- a/src/gallium/drivers/r600/r600_formats.h
+++ b/src/gallium/drivers/r600/r600_formats.h
@@ -64,7 +64,7 @@
#define ENDIAN_8IN32 2
#define ENDIAN_8IN64 3
-static INLINE unsigned r600_endian_swap(unsigned size)
+static inline unsigned r600_endian_swap(unsigned size)
{
if (R600_BIG_ENDIAN) {
switch (size) {
@@ -82,7 +82,7 @@ static INLINE unsigned r600_endian_swap(unsigned size)
}
}
-static INLINE bool r600_is_vertex_format_supported(enum pipe_format format)
+static inline bool r600_is_vertex_format_supported(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
unsigned i;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 8eb0c6806b9..64451516c23 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -51,13 +51,13 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
unsigned i;
/* The number of dwords all the dirty states would take. */
- for (i = 0; i < R600_NUM_ATOMS; i++) {
- if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
- num_dw += ctx->atoms[i]->num_dw;
- if (ctx->screen->b.trace_bo) {
- num_dw += R600_TRACE_CS_DWORDS;
- }
+ i = r600_next_dirty_atom(ctx, 0);
+ while (i < R600_NUM_ATOMS) {
+ num_dw += ctx->atoms[i]->num_dw;
+ if (ctx->screen->b.trace_bo) {
+ num_dw += R600_TRACE_CS_DWORDS;
}
+ i = r600_next_dirty_atom(ctx, i + 1);
}
/* The upper-bound of how much space a draw command would take. */
@@ -68,7 +68,8 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
}
/* Count in queries_suspend. */
- num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend;
+ num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
+ ctx->b.num_cs_dw_timer_queries_suspend;
/* Count in streamout_end at the end of CS. */
if (ctx->b.streamout.begin_emitted) {
@@ -92,7 +93,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
num_dw += 10;
/* Flush if there's not enough space. */
- if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+ if (num_dw > ctx->b.rings.gfx.cs->max_dw) {
ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
}
@@ -295,43 +296,45 @@ void r600_begin_new_cs(struct r600_context *ctx)
r600_emit_command_buffer(ctx->b.rings.gfx.cs, &ctx->start_cs_cmd);
/* Re-emit states. */
- ctx->alphatest_state.atom.dirty = true;
- ctx->blend_color.atom.dirty = true;
- ctx->cb_misc_state.atom.dirty = true;
- ctx->clip_misc_state.atom.dirty = true;
- ctx->clip_state.atom.dirty = true;
- ctx->db_misc_state.atom.dirty = true;
- ctx->db_state.atom.dirty = true;
- ctx->framebuffer.atom.dirty = true;
- ctx->pixel_shader.atom.dirty = true;
- ctx->poly_offset_state.atom.dirty = true;
- ctx->vgt_state.atom.dirty = true;
- ctx->sample_mask.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->blend_color.atom);
+ r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->clip_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->db_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+ r600_mark_atom_dirty(ctx, &ctx->pixel_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
- ctx->scissor[i].atom.dirty = true;
- ctx->viewport[i].atom.dirty = true;
- }
- ctx->config_state.atom.dirty = true;
- ctx->stencil_ref.atom.dirty = true;
- ctx->vertex_fetch_shader.atom.dirty = true;
- ctx->export_shader.atom.dirty = true;
- ctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->scissor[i].atom);
+ r600_mark_atom_dirty(ctx, &ctx->viewport[i].atom);
+ }
+ if (ctx->b.chip_class < EVERGREEN) {
+ r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
+ }
+ r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
+ r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->export_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom);
if (ctx->gs_shader) {
- ctx->geometry_shader.atom.dirty = true;
- ctx->gs_rings.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->geometry_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom);
}
- ctx->vertex_shader.atom.dirty = true;
- ctx->b.streamout.enable_atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->vertex_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
if (ctx->blend_state.cso)
- ctx->blend_state.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->blend_state.atom);
if (ctx->dsa_state.cso)
- ctx->dsa_state.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom);
if (ctx->rasterizer_state.cso)
- ctx->rasterizer_state.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom);
if (ctx->b.chip_class <= R700) {
- ctx->seamless_cube_map.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom);
}
ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 72e2dc42f7e..faf538ccbb5 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -84,7 +84,7 @@ static void llvm_load_system_value(
#else
LLVMValueRef reg = lp_build_const_int32(
ctx->soa.bld_base.base.gallivm, chan);
- ctx->system_values[index] = build_intrinsic(
+ ctx->system_values[index] = lp_build_intrinsic(
ctx->soa.bld_base.base.gallivm->builder,
"llvm.R600.load.input",
ctx->soa.bld_base.base.elem_type, &reg, 1,
@@ -111,9 +111,9 @@ llvm_load_input_vector(
Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), "");
LLVMValueRef HalfVec[2] = {
- build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
+ lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
VecType, Args, ArgCount, LLVMReadNoneAttribute),
- build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
+ lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
VecType, Args, ArgCount, LLVMReadNoneAttribute)
};
LLVMValueRef MaskInputs[4] = {
@@ -127,7 +127,7 @@ llvm_load_input_vector(
Mask, "");
} else {
VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4);
- return build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
+ return lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
VecType, Args, ArgCount, LLVMReadNoneAttribute);
}
}
@@ -153,7 +153,7 @@ llvm_load_input_helper(
arg_count = 1;
}
- return build_intrinsic(bb->gallivm->builder, intrinsic,
+ return lp_build_intrinsic(bb->gallivm->builder, intrinsic,
bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute);
}
#endif
@@ -332,7 +332,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer);
args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component);
lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output",
- LLVMVoidTypeInContext(base->gallivm->context), args, 4);
+ LLVMVoidTypeInContext(base->gallivm->context), args, 4, 0);
}
}
@@ -356,7 +356,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = output;
args[1] = lp_build_const_int32(base->gallivm, next_pos++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -373,7 +373,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE);
args[0] = output;
args[1] = base_vector;
- adjusted_elements[chan] = build_intrinsic(base->gallivm->builder,
+ adjusted_elements[chan] = lp_build_intrinsic(base->gallivm->builder,
"llvm.AMDGPU.dp4", bld_base->base.elem_type,
args, 2, LLVMReadNoneAttribute);
}
@@ -381,7 +381,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
adjusted_elements, 4);
args[1] = lp_build_const_int32(base->gallivm, next_pos++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -394,14 +394,14 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = output;
args[1] = lp_build_const_int32(base->gallivm, next_pos++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
args, 3, 0);
args[1] = lp_build_const_int32(base->gallivm, next_param++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -418,7 +418,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = lp_build_gather_values(base->gallivm, elements, 4);
args[1] = lp_build_const_int32(base->gallivm, next_param++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -430,7 +430,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = output;
args[1] = lp_build_const_int32(base->gallivm, next_param++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -449,7 +449,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
args[1] = lp_build_const_int32(base->gallivm, j);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -458,7 +458,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
} else {
args[1] = lp_build_const_int32(base->gallivm, color_count++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -543,7 +543,7 @@ static void llvm_emit_tex(
case TGSI_OPCODE_TXF: {
args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
- emit_data->output[0] = build_intrinsic(gallivm->builder,
+ emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
"llvm.R600.load.texbuf",
emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
if (ctx->chip_class >= EVERGREEN)
@@ -658,7 +658,7 @@ static void llvm_emit_tex(
lp_build_const_int32(gallivm, 1),
lp_build_const_int32(gallivm, 1)
};
- LLVMValueRef ptr = build_intrinsic(gallivm->builder,
+ LLVMValueRef ptr = lp_build_intrinsic(gallivm->builder,
"llvm.R600.ldptr",
emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
@@ -679,7 +679,7 @@ static void llvm_emit_tex(
}
}
- emit_data->output[0] = build_intrinsic(gallivm->builder,
+ emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
action->intr_name,
emit_data->dst_type, args, c, LLVMReadNoneAttribute);
@@ -754,7 +754,131 @@ static struct lp_build_tgsi_action dot_action = {
.intr_name = "llvm.AMDGPU.dp4"
};
+static void txd_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+
+ LLVMValueRef coords[4];
+ unsigned chan, src;
+ for (src = 0; src < 3; src++) {
+ for (chan = 0; chan < 4; chan++)
+ coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
+
+ emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ }
+ emit_data->arg_count = 3;
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
+
+static void txp_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ LLVMValueRef src_w;
+ unsigned chan;
+ LLVMValueRef coords[5];
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+ src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+
+ for (chan = 0; chan < 3; chan++ ) {
+ LLVMValueRef arg = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 0, chan);
+ coords[chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_DIV, arg, src_w);
+ }
+ coords[3] = bld_base->base.one;
+
+ if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+ radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
+ }
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ emit_data->arg_count = 1;
+}
+
+static void tex_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+
+ LLVMValueRef coords[5];
+ unsigned chan;
+ for (chan = 0; chan < 4; chan++) {
+ coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
+ /* These instructions have additional operand that should be packed
+ * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
+ * That operand should be passed as a float value in the args array
+ * right after the coord vector. After packing it's not used anymore,
+ * that's why arg_count is not increased */
+ coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ }
+
+ if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+ radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
+ }
+
+ emit_data->arg_count = 1;
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
+static void txf_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ const struct tgsi_texture_offset * off = inst->TexOffsets;
+ LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
+
+ /* fetch tex coords */
+ tex_fetch_args(bld_base, emit_data);
+
+ /* fetch tex offsets */
+ if (inst->Texture.NumOffsets) {
+ assert(inst->Texture.NumOffsets == 1);
+
+ emit_data->args[1] = LLVMConstBitCast(
+ bld->immediates[off->Index][off->SwizzleX],
+ offset_type);
+ emit_data->args[2] = LLVMConstBitCast(
+ bld->immediates[off->Index][off->SwizzleY],
+ offset_type);
+ emit_data->args[3] = LLVMConstBitCast(
+ bld->immediates[off->Index][off->SwizzleZ],
+ offset_type);
+ } else {
+ emit_data->args[1] = bld_base->int_bld.zero;
+ emit_data->args[2] = bld_base->int_bld.zero;
+ emit_data->args[3] = bld_base->int_bld.zero;
+ }
+
+ emit_data->arg_count = 4;
+}
LLVMModuleRef r600_tgsi_llvm(
struct radeon_llvm_context * ctx,
@@ -783,7 +907,6 @@ LLVMModuleRef r600_tgsi_llvm(
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const;
bld_base->emit_prologue = llvm_emit_prologue;
bld_base->emit_epilogue = llvm_emit_epilogue;
- ctx->userdata = ctx;
ctx->load_input = llvm_load_input;
ctx->load_system_value = llvm_load_system_value;
@@ -791,18 +914,42 @@ LLVMModuleRef r600_tgsi_llvm(
bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action;
bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action;
bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action;
+ bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
+ bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
+ bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
+ bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
+ bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
lp_build_tgsi_llvm(bld_base, tokens);
@@ -881,7 +1028,7 @@ unsigned r600_llvm_compile(
const char * gpu_family = r600_get_llvm_processor_name(family);
memset(&binary, 0, sizeof(struct radeon_shader_binary));
- r = radeon_llvm_compile(mod, &binary, gpu_family, dump, NULL);
+ r = radeon_llvm_compile(mod, &binary, gpu_family, dump, dump, NULL);
r = r600_create_shader(bc, &binary, use_kill);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index e122b607b86..6ffe5615fbf 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -120,6 +120,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
rctx->b.b.screen = screen;
rctx->b.b.priv = priv;
rctx->b.b.destroy = r600_destroy_context;
+ rctx->b.set_atom_dirty = (void *)r600_set_atom_dirty;
if (!r600_common_context_init(&rctx->b, &rscreen->b))
goto fail;
@@ -176,7 +177,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
goto fail;
}
- rctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX,
+ rctx->b.rings.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
r600_context_gfx_flush, rctx,
rscreen->b.trace_bo ?
rscreen->b.trace_bo->cs_buf : NULL);
@@ -268,8 +269,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ return rscreen->b.info.drm_major == 2 && rscreen->b.info.drm_minor >= 43;
+
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !R600_BIG_ENDIAN && rscreen->b.info.has_userptr;
@@ -329,10 +336,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
- case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 4ea270d3839..9b66105641a 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -36,7 +36,7 @@
#include "util/list.h"
#include "util/u_transfer.h"
-#define R600_NUM_ATOMS 73
+#define R600_NUM_ATOMS 75
#define R600_MAX_VIEWPORTS 16
@@ -85,6 +85,9 @@
#define R600_BIG_ENDIAN 0
#endif
+#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
+#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
+
struct r600_context;
struct r600_bytecode;
struct r600_shader_key;
@@ -426,6 +429,8 @@ struct r600_context {
/* State binding slots are here. */
struct r600_atom *atoms[R600_NUM_ATOMS];
+ /* Dirty atom bitmask for fast tests */
+ unsigned long dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
/* States for CS initialization. */
struct r600_command_buffer start_cs_cmd; /* invariant state mostly */
/** Compute specific registers initializations. The start_cs_cmd atom
@@ -490,37 +495,92 @@ struct r600_context {
struct r600_isa *isa;
};
-static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
+static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
struct r600_command_buffer *cb)
{
- assert(cs->cdw + cb->num_dw <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw + cb->num_dw <= cs->max_dw);
memcpy(cs->buf + cs->cdw, cb->buf, 4 * cb->num_dw);
cs->cdw += cb->num_dw;
}
+static inline void r600_set_atom_dirty(struct r600_context *rctx,
+ struct r600_atom *atom,
+ bool dirty)
+{
+ unsigned long mask;
+ unsigned int w;
+
+ atom->dirty = dirty;
+
+ assert(atom->id != 0);
+ w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
+ mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
+ if (dirty)
+ rctx->dirty_atoms[w] |= mask;
+ else
+ rctx->dirty_atoms[w] &= ~mask;
+}
+
+static inline void r600_mark_atom_dirty(struct r600_context *rctx,
+ struct r600_atom *atom)
+{
+ r600_set_atom_dirty(rctx, atom, true);
+}
+
+static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
+ unsigned int id)
+{
+#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
+ unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
+ unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
+ unsigned long bits, mask = (1ul << bit) - 1;
+
+ for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
+ bits = rctx->dirty_atoms[w] & ~mask;
+ if (bits == 0)
+ continue;
+ return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
+ }
+
+ return R600_NUM_ATOMS;
+#else
+ for (; id < R600_NUM_ATOMS; id++) {
+ bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
+ (1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
+ assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
+ if (dirty)
+ break;
+ }
+
+ return id;
+#endif
+}
+
void r600_trace_emit(struct r600_context *rctx);
-static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
+static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
{
atom->emit(&rctx->b, atom);
- atom->dirty = false;
+ r600_set_atom_dirty(rctx, atom, false);
if (rctx->screen->b.trace_bo) {
r600_trace_emit(rctx);
}
}
-static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)
+static inline void r600_set_cso_state(struct r600_context *rctx,
+ struct r600_cso_state *state, void *cso)
{
state->cso = cso;
- state->atom.dirty = cso != NULL;
+ r600_set_atom_dirty(rctx, &state->atom, cso != NULL);
}
-static INLINE void r600_set_cso_state_with_cb(struct r600_cso_state *state, void *cso,
+static inline void r600_set_cso_state_with_cb(struct r600_context *rctx,
+ struct r600_cso_state *state, void *cso,
struct r600_command_buffer *cb)
{
state->cb = cb;
state->atom.num_dw = cb ? cb->num_dw : 0;
- r600_set_cso_state(state, cso);
+ r600_set_cso_state(rctx, state, cso);
}
/* compute_memory_pool.c */
@@ -529,11 +589,6 @@ void compute_memory_pool_delete(struct compute_memory_pool* pool);
struct compute_memory_pool* compute_memory_pool_new(
struct r600_screen *rscreen);
-/* evergreen_compute.c */
-void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
- unsigned start_slot, unsigned count,
- struct pipe_sampler_view **views);
-
/* evergreen_state.c */
struct pipe_sampler_view *
evergreen_create_sampler_view_custom(struct pipe_context *ctx,
@@ -656,6 +711,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a);
+void r600_add_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id);
void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id,
void (*emit)(struct r600_context *ctx, struct r600_atom *state),
unsigned num_dw);
@@ -719,19 +775,19 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
/*Evergreen Compute packet3*/
#define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE)
-static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned value)
+static inline void r600_store_value(struct r600_command_buffer *cb, unsigned value)
{
cb->buf[cb->num_dw++] = value;
}
-static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
+static inline void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
{
assert(cb->num_dw+num <= cb->max_num_dw);
memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0]));
cb->num_dw += num;
}
-static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -743,7 +799,7 @@ static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, uns
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET && reg < R600_CTL_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -755,7 +811,7 @@ static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, un
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -763,7 +819,7 @@ static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsi
cb->buf[cb->num_dw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
}
-static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_LOOP_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -775,7 +831,7 @@ static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, uns
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= EG_LOOP_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -783,31 +839,31 @@ static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsig
cb->buf[cb->num_dw++] = (reg - EG_LOOP_CONST_OFFSET) >> 2;
}
-static INLINE void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_config_reg_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_context_reg_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_ctl_const_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_loop_const_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
eg_store_loop_const_seq(cb, reg, 1);
r600_store_value(cb, value);
@@ -816,28 +872,28 @@ static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned
void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw);
void r600_release_command_buffer(struct r600_command_buffer *cb);
-static INLINE void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
r600_write_context_reg_seq(cs, reg, num);
/* Set the compute bit on the packet header */
cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
}
-static INLINE void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
cs->buf[cs->cdw++] = PKT3(PKT3_SET_CTL_CONST, num, 0);
cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
}
-static INLINE void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_compute_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
+static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
{
if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) {
r600_write_compute_context_reg(cs, reg, value);
@@ -846,7 +902,7 @@ static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsi
}
}
-static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_ctl_const_seq(cs, reg, 1);
radeon_emit(cs, value);
@@ -855,21 +911,21 @@ static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned re
/*
* common helpers
*/
-static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
+static inline uint32_t S_FIXED(float value, uint32_t frac_bits)
{
return value * (1 << frac_bits);
}
#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
/* 12.4 fixed-point */
-static INLINE unsigned r600_pack_float_12p4(float x)
+static inline unsigned r600_pack_float_12p4(float x)
{
return x <= 0 ? 0 :
x >= 4096 ? 0xffff : x * 16;
}
/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
-static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
+static inline bool r600_can_read_depth(struct r600_texture *rtex)
{
return rtex->resource.b.b.nr_samples <= 1 &&
(rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
@@ -880,7 +936,7 @@ static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
#define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1
#define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2
-static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode)
+static inline unsigned r600_conv_prim_to_gs_out(unsigned mode)
{
static const int prim_conv[] = {
V_028A6C_OUTPRIM_TYPE_POINTLIST,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index af7622e9b34..8d1f95abddc 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -310,6 +310,7 @@ struct r600_shader_ctx {
int gs_next_vertex;
struct r600_shader *gs_for_vs;
int gs_export_gpr_treg;
+ unsigned enabled_stream_buffers_mask;
};
struct r600_shader_tgsi_instruction {
@@ -1402,6 +1403,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
* with MEM_STREAM instructions */
output.array_size = 0xFFF;
output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
+
+ ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer);
+
if (ctx->bc->chip_class >= EVERGREEN) {
switch (so->output[i].output_buffer) {
case 0:
@@ -1718,6 +1722,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
gs->gs_copy_shader = cshader;
ctx.bc->nstack = 1;
+
+ cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
cshader->shader.ring_item_size = ocnt * 16;
return r600_bytecode_build(ctx.bc);
@@ -1931,15 +1937,14 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+ ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1;
+ ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
- ctx.temp_reg = ctx.bc->ar_reg + 2;
- ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3;
- ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4;
+ ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
+ ctx.temp_reg = ctx.bc->ar_reg + 4;
} else {
- ctx.temp_reg = ctx.bc->ar_reg + 1;
- ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2;
- ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
+ ctx.temp_reg = ctx.bc->ar_reg + 3;
}
shader->max_arrays = 0;
@@ -2086,7 +2091,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
radeon_llvm_ctx.stream_outputs = &so;
- radeon_llvm_ctx.clip_vertex = ctx.cv_output;
radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
radeon_llvm_ctx.has_compressed_msaa_texturing =
ctx.bc->has_compressed_msaa_texturing;
@@ -2262,6 +2266,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
so.num_outputs && !use_llvm)
emit_streamout(&ctx, &so);
+ pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
convert_edgeflag_to_int(&ctx);
if (ring_outputs) {
@@ -2485,6 +2490,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
j++;
+ shader->nr_ps_color_exports++;
}
noutput = j;
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index dd359d7e959..5d05c8153d7 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -125,6 +125,7 @@ struct r600_pipe_shader {
struct r600_shader_key key;
unsigned db_shader_control;
unsigned ps_depth_export;
+ unsigned enabled_stream_buffers_mask;
};
/* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 960dfcedfef..5cc2283792d 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -473,7 +473,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
/* offset */
rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 12.0f;
+ rs->offset_scale = state->offset_scale * 16.0f;
rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri;
if (state->point_size_per_vertex) {
@@ -802,7 +802,7 @@ static void r600_set_scissor_states(struct pipe_context *ctx,
return;
for (i = start_slot ; i < start_slot + num_scissors; i++) {
- rctx->scissor[i].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
}
}
@@ -1193,7 +1193,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
if (rctx->alphatest_state.bypass != alphatest_bypass) {
rctx->alphatest_state.bypass = alphatest_bypass;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
}
@@ -1209,28 +1209,28 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
if (state->zsbuf->format != rctx->poly_offset_state.zs_format) {
rctx->poly_offset_state.zs_format = state->zsbuf->format;
- rctx->poly_offset_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
}
if (rctx->db_state.rsurf != surf) {
rctx->db_state.rsurf = surf;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
} else if (rctx->db_state.rsurf) {
rctx->db_state.rsurf = NULL;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) {
rctx->alphatest_state.bypass = false;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
/* Calculate the CS size. */
@@ -1250,7 +1250,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
rctx->framebuffer.atom.num_dw += 2;
}
- rctx->framebuffer.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
r600_set_sample_locations_constant_buffer(rctx);
}
@@ -1541,9 +1541,9 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
rctx->ps_iter_samples = min_samples;
if (rctx->framebuffer.nr_samples > 1) {
- rctx->rasterizer_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->rasterizer_state.atom);
if (rctx->b.chip_class == R600)
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -2089,7 +2089,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) {
rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2;
- rctx->config_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
}
return true;
@@ -2796,11 +2796,11 @@ void r600_update_db_shader_control(struct r600_context * rctx)
if (db_shader_control != rctx->db_misc_state.db_shader_control) {
rctx->db_misc_state.db_shader_control = db_shader_control;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
-static INLINE unsigned r600_array_mode(unsigned mode)
+static inline unsigned r600_array_mode(unsigned mode)
{
switch (mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_0280A0_ARRAY_LINEAR_ALIGNED;
@@ -3074,8 +3074,8 @@ void r600_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
- rctx->atoms[id++] = &rctx->b.streamout.begin_atom;
- rctx->atoms[id++] = &rctx->b.streamout.enable_atom;
+ r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
+ r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 13dc9ee8c10..aa4a8d0240f 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -47,18 +47,26 @@ void r600_release_command_buffer(struct r600_command_buffer *cb)
FREE(cb->buf);
}
+void r600_add_atom(struct r600_context *rctx,
+ struct r600_atom *atom,
+ unsigned id)
+{
+ assert(id < R600_NUM_ATOMS);
+ assert(rctx->atoms[id] == NULL);
+ rctx->atoms[id] = atom;
+ atom->id = id;
+ atom->dirty = false;
+}
+
void r600_init_atom(struct r600_context *rctx,
struct r600_atom *atom,
unsigned id,
void (*emit)(struct r600_context *ctx, struct r600_atom *state),
unsigned num_dw)
{
- assert(id < R600_NUM_ATOMS);
- assert(rctx->atoms[id] == NULL);
- rctx->atoms[id] = atom;
atom->emit = (void*)emit;
atom->num_dw = num_dw;
- atom->dirty = false;
+ r600_add_atom(rctx, atom, id);
}
void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -127,11 +135,11 @@ static void r600_bind_blend_state_internal(struct r600_context *rctx,
rctx->dual_src_blend = blend->dual_src_blend;
if (!blend_disable) {
- r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer);
+ r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer);
color_control = blend->cb_color_control;
} else {
/* Blending is disabled. */
- r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer_no_blend);
+ r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer_no_blend);
color_control = blend->cb_color_control_no_blend;
}
@@ -150,7 +158,7 @@ static void r600_bind_blend_state_internal(struct r600_context *rctx,
update_cb = true;
}
if (update_cb) {
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
}
@@ -160,7 +168,7 @@ static void r600_bind_blend_state(struct pipe_context *ctx, void *state)
struct r600_blend_state *blend = (struct r600_blend_state *)state;
if (blend == NULL) {
- r600_set_cso_state_with_cb(&rctx->blend_state, NULL, NULL);
+ r600_set_cso_state_with_cb(rctx, &rctx->blend_state, NULL, NULL);
return;
}
@@ -173,7 +181,7 @@ static void r600_set_blend_color(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->blend_color.state = *state;
- rctx->blend_color.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->blend_color.atom);
}
void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
@@ -210,7 +218,7 @@ static void r600_set_clip_state(struct pipe_context *ctx,
struct pipe_constant_buffer cb;
rctx->clip_state.state = *state;
- rctx->clip_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
cb.buffer = NULL;
cb.user_buffer = state->ucp;
@@ -226,7 +234,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->stencil_ref.state = *state;
- rctx->stencil_ref.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom);
}
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
@@ -274,11 +282,11 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
struct r600_stencil_ref ref;
if (state == NULL) {
- r600_set_cso_state_with_cb(&rctx->dsa_state, NULL, NULL);
+ r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, NULL, NULL);
return;
}
- r600_set_cso_state_with_cb(&rctx->dsa_state, dsa, &dsa->buffer);
+ r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, dsa, &dsa->buffer);
ref.ref_value[0] = rctx->stencil_ref.pipe_state.ref_value[0];
ref.ref_value[1] = rctx->stencil_ref.pipe_state.ref_value[1];
@@ -293,7 +301,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
* we are having lockup on evergreen so do not enable
* hyperz when not writing zbuffer
*/
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -304,7 +312,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
rctx->alphatest_state.sx_alpha_ref != dsa->alpha_ref) {
rctx->alphatest_state.sx_alpha_test_control = dsa->sx_alpha_test_control;
rctx->alphatest_state.sx_alpha_ref = dsa->alpha_ref;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
}
@@ -318,14 +326,14 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
rctx->rasterizer = rs;
- r600_set_cso_state_with_cb(&rctx->rasterizer_state, rs, &rs->buffer);
+ r600_set_cso_state_with_cb(rctx, &rctx->rasterizer_state, rs, &rs->buffer);
if (rs->offset_enable &&
(rs->offset_units != rctx->poly_offset_state.offset_units ||
rs->offset_scale != rctx->poly_offset_state.offset_scale)) {
rctx->poly_offset_state.offset_units = rs->offset_units;
rctx->poly_offset_state.offset_scale = rs->offset_scale;
- rctx->poly_offset_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
}
/* Update clip_misc_state. */
@@ -333,14 +341,14 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
rctx->clip_misc_state.clip_plane_enable != rs->clip_plane_enable) {
rctx->clip_misc_state.pa_cl_clip_cntl = rs->pa_cl_clip_cntl;
rctx->clip_misc_state.clip_plane_enable = rs->clip_plane_enable;
- rctx->clip_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
/* Workaround for a missing scissor enable on r600. */
if (rctx->b.chip_class == R600 &&
rs->scissor_enable != rctx->scissor[0].enable) {
rctx->scissor[0].enable = rs->scissor_enable;
- rctx->scissor[0].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->scissor[0].atom);
}
/* Re-emit PA_SC_LINE_STIPPLE. */
@@ -378,7 +386,7 @@ void r600_sampler_states_dirty(struct r600_context *rctx,
state->atom.num_dw =
util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5;
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
}
@@ -399,9 +407,9 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
assert(start == 0); /* XXX fix below */
- if (shader != PIPE_SHADER_VERTEX &&
- shader != PIPE_SHADER_FRAGMENT) {
- return;
+ if (!states) {
+ disable_mask = ~0u;
+ count = 0;
}
for (i = 0; i < count; i++) {
@@ -443,7 +451,7 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
/* change in TA_CNTL_AUX need a pipeline flush */
rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
rctx->seamless_cube_map.enabled = seamless_cube_map;
- rctx->seamless_cube_map.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
}
}
@@ -483,7 +491,7 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- r600_set_cso_state(&rctx->vertex_fetch_shader, state);
+ r600_set_cso_state(rctx, &rctx->vertex_fetch_shader, state);
}
static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state)
@@ -513,7 +521,7 @@ void r600_vertex_buffers_dirty(struct r600_context *rctx)
rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE;
rctx->vertex_buffer_state.atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 12 : 11) *
util_bitcount(rctx->vertex_buffer_state.dirty_mask);
- rctx->vertex_buffer_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
}
}
@@ -570,7 +578,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
state->atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 14 : 13) *
util_bitcount(state->dirty_mask);
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
}
@@ -593,9 +601,9 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
assert(start == 0); /* XXX fix below */
- if (shader == PIPE_SHADER_COMPUTE) {
- evergreen_set_cs_sampler_view(pipe, start, count, views);
- return;
+ if (!views) {
+ disable_mask = ~0u;
+ count = 0;
}
remaining_mask = dst->views.enabled_mask & disable_mask;
@@ -673,7 +681,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
for (i = start_slot; i < start_slot + num_viewports; i++) {
rctx->viewport[i].state = state[i - start_slot];
- rctx->viewport[i].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->viewport[i].atom);
}
}
@@ -694,7 +702,7 @@ void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
}
/* Compute the key for the hw shader variant */
-static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
+static inline struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
struct r600_pipe_shader_selector * sel)
{
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -913,7 +921,7 @@ void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf
rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
state->atom.num_dw = rctx->b.chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
: util_bitcount(state->dirty_mask)*19;
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
}
@@ -982,7 +990,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
return;
rctx->sample_mask.sample_mask = sample_mask;
- rctx->sample_mask.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
}
/*
@@ -1107,27 +1115,28 @@ static void update_shader_atom(struct pipe_context *ctx,
struct r600_shader_state *state,
struct r600_pipe_shader *shader)
{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+
state->shader = shader;
if (shader) {
state->atom.num_dw = shader->command_buffer.num_dw;
- state->atom.dirty = true;
r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo);
} else {
state->atom.num_dw = 0;
- state->atom.dirty = false;
}
+ r600_mark_atom_dirty(rctx, &state->atom);
}
static void update_gs_block_state(struct r600_context *rctx, unsigned enable)
{
if (rctx->shader_stages.geom_enable != enable) {
rctx->shader_stages.geom_enable = enable;
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
}
if (rctx->gs_rings.enable != enable) {
rctx->gs_rings.enable = enable;
- rctx->gs_rings.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->gs_rings.atom);
if (enable && !rctx->gs_rings.esgs_ring.buffer) {
unsigned size = 0x1C000;
@@ -1192,7 +1201,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (!rctx->shader_stages.geom_enable) {
rctx->shader_stages.geom_enable = true;
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
}
/* gs_shader provides GS and VS (copy shader) */
@@ -1206,8 +1215,9 @@ static bool r600_update_derived_state(struct r600_context *rctx)
rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->gs_shader->current->gs_copy_shader->pa_cl_vs_out_cntl;
rctx->clip_misc_state.clip_dist_write = rctx->gs_shader->current->gs_copy_shader->shader.clip_dist_write;
rctx->clip_misc_state.clip_disable = rctx->gs_shader->current->shader.vs_position_window_space;
- rctx->clip_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
+ rctx->b.streamout.enabled_stream_buffers_mask = rctx->gs_shader->current->gs_copy_shader->enabled_stream_buffers_mask;
}
r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
@@ -1223,7 +1233,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
update_shader_atom(ctx, &rctx->geometry_shader, NULL);
update_shader_atom(ctx, &rctx->export_shader, NULL);
rctx->shader_stages.geom_enable = false;
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
}
r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
@@ -1240,8 +1250,9 @@ static bool r600_update_derived_state(struct r600_context *rctx)
rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl;
rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write;
rctx->clip_misc_state.clip_disable = rctx->vs_shader->current->shader.vs_position_window_space;
- rctx->clip_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
+ rctx->b.streamout.enabled_stream_buffers_mask = rctx->vs_shader->current->enabled_stream_buffers_mask;
}
}
@@ -1252,7 +1263,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (rctx->b.chip_class <= R700) {
@@ -1260,7 +1271,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (rctx->cb_misc_state.multiwrite != multiwrite) {
rctx->cb_misc_state.multiwrite = multiwrite;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
}
@@ -1274,7 +1285,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
r600_update_ps_state(ctx, rctx->ps_shader->current);
}
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current);
}
@@ -1409,7 +1420,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
data += info.indirect_offset / sizeof(unsigned);
start = data[2] * ib.index_size;
count = data[0];
- rctx->b.ws->buffer_unmap(indirect_resource->cs_buf);
}
else {
start = 0;
@@ -1454,24 +1464,23 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
rctx->vgt_state.vgt_multi_prim_ib_reset_en = info.primitive_restart;
rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info.restart_index;
rctx->vgt_state.vgt_indx_offset = info.index_bias;
- rctx->vgt_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->vgt_state.atom);
}
/* Workaround for hardware deadlock on certain R600 ASICs: write into a CB register. */
if (rctx->b.chip_class == R600) {
rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
/* Emit states. */
r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
r600_flush_emit(rctx);
- for (i = 0; i < R600_NUM_ATOMS; i++) {
- if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) {
- continue;
- }
+ i = r600_next_dirty_atom(rctx, 0);
+ while (i < R600_NUM_ATOMS) {
r600_emit_atom(rctx, rctx->atoms[i]);
+ i = r600_next_dirty_atom(rctx, i + 1);
}
if (rctx->b.chip_class == CAYMAN) {
@@ -2490,7 +2499,7 @@ static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable
if (rctx->db_misc_state.occlusion_query_enabled != enable) {
rctx->db_misc_state.occlusion_query_enabled = enable;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
index 2e38a62c05a..62680788c5e 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -489,7 +489,7 @@ bool alu_group_tracker::try_reserve(alu_node* n) {
n->bc.bank_swizzle = 0;
- if (!trans & fbs)
+ if (!trans && fbs)
n->bc.bank_swizzle = VEC_210;
if (gpr.try_reserve(n)) {
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index fc5f6c29870..cb9809f2449 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -84,7 +84,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
}
}
- if (busy || ctx->ws->buffer_is_busy(resource->buf, rusage)) {
+ if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
return NULL;
} else {
@@ -121,7 +121,8 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
/* Older kernels didn't always flush the HDP cache before
* CS execution
*/
- if (rscreen->info.drm_minor < 40) {
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40) {
res->domains = RADEON_DOMAIN_GTT;
flags |= RADEON_FLAG_GTT_WC;
break;
@@ -147,7 +148,8 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
* Write-combined CPU mappings are fine, the kernel ensures all CPU
* writes finish before the GPU executes a command stream.
*/
- if (rscreen->info.drm_minor < 40)
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40)
res->domains = RADEON_DOMAIN_GTT;
else if (res->domains & RADEON_DOMAIN_VRAM)
flags |= RADEON_FLAG_CPU_ACCESS;
@@ -161,6 +163,9 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
flags |= RADEON_FLAG_NO_CPU_ACCESS;
}
+ if (rscreen->debug_flags & DBG_NO_WC)
+ flags &= ~RADEON_FLAG_GTT_WC;
+
/* Allocate a new resource. */
new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
use_reusable_pool,
@@ -274,7 +279,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
- rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+ !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
}
/* At this point, the buffer is always idle. */
@@ -288,7 +293,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
- rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+ !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset;
struct r600_resource *staging = NULL;
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h
index b51eebbc68e..03a04b754d6 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -33,7 +33,7 @@
#include "r600_pipe_common.h"
#include "r600d_common.h"
-static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
+static inline unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
struct r600_ring *ring,
struct r600_resource *rbo,
enum radeon_bo_usage usage,
@@ -59,7 +59,7 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
rbo->domains, priority) * 4;
}
-static INLINE void r600_emit_reloc(struct r600_common_context *rctx,
+static inline void r600_emit_reloc(struct r600_common_context *rctx,
struct r600_ring *ring, struct r600_resource *rbo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority)
@@ -74,57 +74,57 @@ static INLINE void r600_emit_reloc(struct r600_common_context *rctx,
}
}
-static INLINE void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
}
-static INLINE void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_config_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
}
-static INLINE void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
-static INLINE void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
si_write_sh_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
-static INLINE void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
cik_write_uconfig_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 3def4446882..ed5d1dabdc3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -108,9 +108,9 @@ void r600_draw_rectangle(struct blitter_context *blitter,
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
{
/* Flush if there's not enough space. */
- if ((num_dw + ctx->rings.dma.cs->cdw) > RADEON_MAX_CMDBUF_DWORDS) {
+ if ((num_dw + ctx->rings.dma.cs->cdw) > ctx->rings.dma.cs->max_dw) {
ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- assert((num_dw + ctx->rings.dma.cs->cdw) <= RADEON_MAX_CMDBUF_DWORDS);
+ assert((num_dw + ctx->rings.dma.cs->cdw) <= ctx->rings.dma.cs->max_dw);
}
}
@@ -132,10 +132,11 @@ void r600_preflush_suspend_features(struct r600_common_context *ctx)
}
/* suspend queries */
- ctx->nontimer_queries_suspended = false;
+ ctx->queries_suspended_for_flush = false;
if (ctx->num_cs_dw_nontimer_queries_suspend) {
r600_suspend_nontimer_queries(ctx);
- ctx->nontimer_queries_suspended = true;
+ r600_suspend_timer_queries(ctx);
+ ctx->queries_suspended_for_flush = true;
}
ctx->streamout.suspended = false;
@@ -153,8 +154,9 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
}
/* resume queries */
- if (ctx->nontimer_queries_suspended) {
+ if (ctx->queries_suspended_for_flush) {
r600_resume_nontimer_queries(ctx);
+ r600_resume_timer_queries(ctx);
}
/* Re-enable render condition. */
@@ -196,6 +198,19 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
rctx->rings.dma.flushing = false;
}
+static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ unsigned latest = rctx->ws->query_value(rctx->ws,
+ RADEON_GPU_RESET_COUNTER);
+
+ if (rctx->gpu_reset_counter == latest)
+ return PIPE_NO_RESET;
+
+ rctx->gpu_reset_counter = latest;
+ return PIPE_UNKNOWN_CONTEXT_RESET;
+}
+
bool r600_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen)
{
@@ -222,6 +237,13 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->b.memory_barrier = r600_memory_barrier;
rctx->b.flush = r600_flush_from_st;
+ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
+ rctx->b.get_device_reset_status = r600_get_reset_status;
+ rctx->gpu_reset_counter =
+ rctx->ws->query_value(rctx->ws,
+ RADEON_GPU_RESET_COUNTER);
+ }
+
LIST_INITHEAD(&rctx->texture_buffers);
r600_init_context_texture_functions(rctx);
@@ -240,8 +262,12 @@ bool r600_common_context_init(struct r600_common_context *rctx,
if (!rctx->uploader)
return false;
+ rctx->ctx = rctx->ws->ctx_create(rctx->ws);
+ if (!rctx->ctx)
+ return false;
+
if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
- rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA,
+ rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
r600_flush_dma_ring,
rctx, NULL);
rctx->rings.dma.flush = r600_flush_dma_ring;
@@ -252,12 +278,12 @@ bool r600_common_context_init(struct r600_common_context *rctx,
void r600_common_context_cleanup(struct r600_common_context *rctx)
{
- if (rctx->rings.gfx.cs) {
+ if (rctx->rings.gfx.cs)
rctx->ws->cs_destroy(rctx->rings.gfx.cs);
- }
- if (rctx->rings.dma.cs) {
+ if (rctx->rings.dma.cs)
rctx->ws->cs_destroy(rctx->rings.dma.cs);
- }
+ if (rctx->ctx)
+ rctx->ws->ctx_destroy(rctx->ctx);
if (rctx->uploader) {
u_upload_destroy(rctx->uploader);
@@ -313,6 +339,11 @@ static const struct debug_named_value common_debug_options[] = {
{ "gs", DBG_GS, "Print geometry shaders" },
{ "ps", DBG_PS, "Print pixel shaders" },
{ "cs", DBG_CS, "Print compute shaders" },
+ { "tcs", DBG_TCS, "Print tessellation control shaders" },
+ { "tes", DBG_TES, "Print tessellation evaluation shaders" },
+ { "noir", DBG_NO_IR, "Don't print the LLVM IR"},
+ { "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
+ { "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
/* features */
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
@@ -324,6 +355,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
{ "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
+ { "nowc", DBG_NO_WC, "Disable GTT write combining" },
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -338,11 +370,9 @@ static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
return "AMD";
}
-static const char* r600_get_name(struct pipe_screen* pscreen)
+static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
-
- switch (rscreen->family) {
+ switch (rscreen->info.family) {
case CHIP_R600: return "AMD R600";
case CHIP_RV610: return "AMD RV610";
case CHIP_RV630: return "AMD RV630";
@@ -378,10 +408,21 @@ static const char* r600_get_name(struct pipe_screen* pscreen)
case CHIP_KABINI: return "AMD KABINI";
case CHIP_HAWAII: return "AMD HAWAII";
case CHIP_MULLINS: return "AMD MULLINS";
+ case CHIP_TONGA: return "AMD TONGA";
+ case CHIP_ICELAND: return "AMD ICELAND";
+ case CHIP_CARRIZO: return "AMD CARRIZO";
+ case CHIP_FIJI: return "AMD FIJI";
default: return "AMD unknown";
}
}
+static const char* r600_get_name(struct pipe_screen* pscreen)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
+
+ return rscreen->renderer_string;
+}
+
static float r600_get_paramf(struct pipe_screen* pscreen,
enum pipe_capf param)
{
@@ -495,6 +536,10 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
#else
return "kabini";
#endif
+ case CHIP_TONGA: return "tonga";
+ case CHIP_ICELAND: return "iceland";
+ case CHIP_CARRIZO: return "carrizo";
+ case CHIP_FIJI: return "fiji";
default: return "";
}
}
@@ -636,6 +681,12 @@ static int r600_get_compute_param(struct pipe_screen *screen,
return sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
break; /* unused */
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ if (ret) {
+ uint32_t *subgroup_size = ret;
+ *subgroup_size = r600_wavefront_size(rscreen->family);
+ }
+ return sizeof(uint32_t);
}
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
@@ -656,25 +707,33 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct pipe_driver_query_info list[] = {
+ {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
+ {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
{"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
{"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}},
+ {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
{"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
- {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+ {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+ {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
{"temperature", R600_QUERY_GPU_TEMPERATURE, {100}},
- {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}},
- {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}},
- {"GPU-load", R600_QUERY_GPU_LOAD, {100}}
+ {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
+ {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
};
unsigned num_queries;
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
num_queries = Elements(list);
+ else if (rscreen->info.drm_major == 3)
+ num_queries = Elements(list) - 3;
else
- num_queries = 8;
+ num_queries = Elements(list) - 4;
if (!info)
return num_queries;
@@ -695,14 +754,6 @@ static void r600_fence_reference(struct pipe_screen *screen,
rws->fence_reference(ptr, fence);
}
-static boolean r600_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
-
- return rws->fence_wait(rws, fence, 0);
-}
-
static boolean r600_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
@@ -837,8 +888,22 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
bool r600_common_screen_init(struct r600_common_screen *rscreen,
struct radeon_winsys *ws)
{
+ char llvm_string[32] = {};
+
ws->query_info(ws, &rscreen->info);
+#if HAVE_LLVM
+ snprintf(llvm_string, sizeof(llvm_string),
+ ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+ HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+#endif
+
+ snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
+ "%s (DRM %i.%i.%i%s)",
+ r600_get_chip_name(rscreen), rscreen->info.drm_major,
+ rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
+ llvm_string);
+
rscreen->b.get_name = r600_get_name;
rscreen->b.get_vendor = r600_get_vendor;
rscreen->b.get_device_vendor = r600_get_device_vendor;
@@ -848,7 +913,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->b.get_timestamp = r600_get_timestamp;
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
- rscreen->b.fence_signalled = r600_fence_signalled;
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
@@ -874,7 +938,9 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
pipe_mutex_init(rscreen->aux_context_lock);
pipe_mutex_init(rscreen->gpu_load_mutex);
- if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) {
+ if (((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 28) ||
+ rscreen->info.drm_major == 3) &&
+ (rscreen->debug_flags & DBG_TRACE_CS)) {
rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b,
PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING,
@@ -922,10 +988,8 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen)
pipe_mutex_destroy(rscreen->aux_context_lock);
rscreen->aux_context->destroy(rscreen->aux_context);
- if (rscreen->trace_bo) {
- rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
+ if (rscreen->trace_bo)
pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
- }
rscreen->ws->destroy(rscreen->ws);
FREE(rscreen);
@@ -941,6 +1005,10 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
switch (tgsi_get_processor_type(tokens)) {
case TGSI_PROCESSOR_VERTEX:
return (rscreen->debug_flags & DBG_VS) != 0;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return (rscreen->debug_flags & DBG_TCS) != 0;
+ case TGSI_PROCESSOR_TESS_EVAL:
+ return (rscreen->debug_flags & DBG_TES) != 0;
case TGSI_PROCESSOR_GEOMETRY:
return (rscreen->debug_flags & DBG_GS) != 0;
case TGSI_PROCESSOR_FRAGMENT:
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 6ce81d33ddd..29db1cc4e07 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -59,6 +59,8 @@
#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9)
#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10)
#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
+#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
#define R600_CONTEXT_PRIVATE_FLAG (1u << 1)
@@ -79,17 +81,23 @@
#define DBG_GS (1 << 7)
#define DBG_PS (1 << 8)
#define DBG_CS (1 << 9)
+#define DBG_TCS (1 << 10)
+#define DBG_TES (1 << 11)
+#define DBG_NO_IR (1 << 12)
+#define DBG_NO_TGSI (1 << 13)
+#define DBG_NO_ASM (1 << 14)
+/* Bits 21-31 are reserved for the r600g driver. */
/* features */
-#define DBG_NO_ASYNC_DMA (1 << 10)
-#define DBG_NO_HYPERZ (1 << 11)
-#define DBG_NO_DISCARD_RANGE (1 << 12)
-#define DBG_NO_2D_TILING (1 << 13)
-#define DBG_NO_TILING (1 << 14)
-#define DBG_SWITCH_ON_EOP (1 << 15)
-#define DBG_FORCE_DMA (1 << 16)
-#define DBG_PRECOMPILE (1 << 17)
-#define DBG_INFO (1 << 18)
-/* The maximum allowed bit is 20. */
+#define DBG_NO_ASYNC_DMA (1llu << 32)
+#define DBG_NO_HYPERZ (1llu << 33)
+#define DBG_NO_DISCARD_RANGE (1llu << 34)
+#define DBG_NO_2D_TILING (1llu << 35)
+#define DBG_NO_TILING (1llu << 36)
+#define DBG_SWITCH_ON_EOP (1llu << 37)
+#define DBG_FORCE_DMA (1llu << 38)
+#define DBG_PRECOMPILE (1llu << 39)
+#define DBG_INFO (1llu << 40)
+#define DBG_NO_WC (1llu << 41)
#define R600_MAP_BUFFER_ALIGNMENT 64
@@ -127,9 +135,8 @@ struct radeon_shader_binary {
struct radeon_shader_reloc *relocs;
unsigned reloc_count;
- /** Set to 1 if the disassembly for this binary has been dumped to
- * stderr. */
- int disassembled;
+ /** Disassembled shader in a string. */
+ char *disasm_string;
};
struct r600_resource {
@@ -214,7 +221,6 @@ struct r600_texture {
float depth_clear_value;
bool non_disp_tiling; /* R600-Cayman only */
- unsigned mipmap_shift;
};
struct r600_surface {
@@ -236,6 +242,7 @@ struct r600_surface {
unsigned cb_color_pitch; /* EG and later */
unsigned cb_color_slice; /* EG and later */
unsigned cb_color_attrib; /* EG and later */
+ unsigned cb_dcc_control; /* VI and later */
unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
unsigned cb_color_fmask_slice; /* EG and later */
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
@@ -272,7 +279,7 @@ struct r600_common_screen {
enum chip_class chip_class;
struct radeon_info info;
struct r600_tiling_info tiling_info;
- unsigned debug_flags;
+ uint64_t debug_flags;
bool has_cp_dma;
bool has_streamout;
@@ -285,12 +292,23 @@ struct r600_common_screen {
uint32_t *trace_ptr;
unsigned cs_count;
+ /* This must be in the screen, because UE4 uses one context for
+ * compilation and another one for rendering.
+ */
+ unsigned num_compilations;
+ /* Along with ST_DEBUG=precompile, this should show if applications
+ * are loading shaders on demand. This is a monotonic counter.
+ */
+ unsigned num_shaders_created;
+
/* GPU load thread. */
pipe_mutex gpu_load_mutex;
pipe_thread gpu_load_thread;
unsigned gpu_load_counter_busy;
unsigned gpu_load_counter_idle;
- unsigned gpu_load_stop_thread; /* bool */
+ volatile unsigned gpu_load_stop_thread; /* bool */
+
+ char renderer_string[64];
};
/* This encapsulates a state or an operation which can emitted into the GPU
@@ -298,6 +316,7 @@ struct r600_common_screen {
struct r600_atom {
void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
unsigned num_dw;
+ unsigned short id; /* used by r600 only */
bool dirty;
};
@@ -327,6 +346,10 @@ struct r600_streamout {
/* External state which comes from the vertex shader,
* it must be set explicitly when binding a shader. */
unsigned *stride_in_dw;
+ unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
+
+ /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
+ unsigned hw_enabled_mask;
/* The state of VGT_STRMOUT_(CONFIG|EN). */
struct r600_atom enable_atom;
@@ -352,10 +375,12 @@ struct r600_common_context {
struct r600_common_screen *screen;
struct radeon_winsys *ws;
+ struct radeon_winsys_ctx *ctx;
enum radeon_family family;
enum chip_class chip_class;
struct r600_rings rings;
unsigned initial_gfx_cs_size;
+ unsigned gpu_reset_counter;
struct u_upload_mgr *uploader;
struct u_suballocator *allocator_so_filled_size;
@@ -376,11 +401,14 @@ struct r600_common_context {
int num_occlusion_queries;
/* Keep track of non-timer queries, because they should be suspended
* during context flushing.
- * The timer queries (TIME_ELAPSED) shouldn't be suspended. */
+ * The timer queries (TIME_ELAPSED) shouldn't be suspended for blits,
+ * but they should be suspended between IBs. */
struct list_head active_nontimer_queries;
+ struct list_head active_timer_queries;
unsigned num_cs_dw_nontimer_queries_suspend;
+ unsigned num_cs_dw_timer_queries_suspend;
/* If queries have been suspended. */
- bool nontimer_queries_suspended;
+ bool queries_suspended_for_flush;
/* Additional hardware info. */
unsigned backend_mask;
unsigned max_db; /* for OQ */
@@ -441,6 +469,9 @@ struct r600_common_context {
/* This ensures there is enough space in the command stream. */
void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo);
+
+ void (*set_atom_dirty)(struct r600_common_context *ctx,
+ struct r600_atom *atom, bool dirty);
};
/* r600_buffer.c */
@@ -495,6 +526,8 @@ unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
void r600_query_init(struct r600_common_context *rctx);
void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
void r600_resume_nontimer_queries(struct r600_common_context *ctx);
+void r600_suspend_timer_queries(struct r600_common_context *ctx);
+void r600_resume_timer_queries(struct r600_common_context *ctx);
void r600_query_init_backend_mask(struct r600_common_context *ctx);
/* r600_streamout.c */
@@ -549,12 +582,12 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
/* Inline helpers. */
-static INLINE struct r600_resource *r600_resource(struct pipe_resource *r)
+static inline struct r600_resource *r600_resource(struct pipe_resource *r)
{
return (struct r600_resource*)r;
}
-static INLINE void
+static inline void
r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
{
pipe_resource_reference((struct pipe_resource **)ptr,
@@ -570,6 +603,26 @@ static inline unsigned r600_tex_aniso_filter(unsigned filter)
/* else */ return 4;
}
+static inline unsigned r600_wavefront_size(enum radeon_family family)
+{
+ switch (family) {
+ case CHIP_RV610:
+ case CHIP_RS780:
+ case CHIP_RV620:
+ case CHIP_RS880:
+ return 16;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_PALM:
+ case CHIP_CEDAR:
+ return 32;
+ default:
+ return 64;
+ }
+}
+
#define COMPUTE_DBG(rscreen, fmt, args...) \
do { \
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 71f4a1522f9..7057aa19a7c 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -54,6 +54,8 @@ struct r600_query {
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
+ /* For transform feedback: which stream the query is for */
+ unsigned stream;
};
@@ -90,6 +92,8 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_GPU_LOAD:
+ case R600_QUERY_NUM_COMPILATIONS:
+ case R600_QUERY_NUM_SHADERS_CREATED:
return NULL;
}
@@ -118,7 +122,6 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
}
results += 4 * ctx->max_db;
}
- ctx->ws->buffer_unmap(buf->cs_buf);
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
@@ -130,7 +133,6 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
case PIPE_QUERY_PIPELINE_STATISTICS:
results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
memset(results, 0, buf_size);
- ctx->ws->buffer_unmap(buf->cs_buf);
break;
default:
assert(0);
@@ -157,6 +159,17 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
}
}
+static unsigned event_type_for_stream(struct r600_query *query)
+{
+ switch (query->stream) {
+ default:
+ case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
+ case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1;
+ case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2;
+ case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3;
+ }
+}
+
static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
{
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
@@ -191,7 +204,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
+ radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
@@ -215,9 +228,10 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
- if (!r600_is_timer_query(query->type)) {
+ if (r600_is_timer_query(query->type))
+ ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
+ else
ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
- }
}
static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
@@ -248,7 +262,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
va += query->buffer.results_end + query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
+ radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
@@ -279,9 +293,10 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
query->buffer.results_end += query->result_size;
if (r600_query_needs_begin(query->type)) {
- if (!r600_is_timer_query(query->type)) {
+ if (r600_is_timer_query(query->type))
+ ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw;
+ else
ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
- }
}
r600_update_occlusion_query_state(ctx, query->type, -1);
@@ -292,6 +307,13 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
int operation, bool flag_wait)
{
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+ uint32_t op = PRED_OP(operation);
+
+ /* if true then invert, see GL_ARB_conditional_render_inverted */
+ if (ctx->current_render_cond_cond)
+ op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visable/overflow */
+ else
+ op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
if (operation == PREDICATION_OP_CLEAR) {
ctx->need_gfx_cs_space(&ctx->b, 3, FALSE);
@@ -302,24 +324,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
} else {
struct r600_query_buffer *qbuf;
unsigned count;
- uint32_t op;
-
/* Find how many results there are. */
count = 0;
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
count += qbuf->results_end / query->result_size;
}
-
+
ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE);
-
- op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
- (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
-
+
+ op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
+
/* emit predicate packets for all data blocks */
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
unsigned results_base = 0;
uint64_t va = qbuf->buf->gpu_address;
-
+
while (results_base < qbuf->results_end) {
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL);
@@ -327,7 +346,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
results_base += query->result_size;
-
+
/* set CONTINUE bit for all packets except the first */
op |= PREDICATION_CONTINUE;
}
@@ -369,6 +388,7 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32;
query->num_cs_dw = 6;
+ query->stream = index;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on EG, 8 on R600. */
@@ -390,6 +410,8 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_GPU_LOAD:
+ case R600_QUERY_NUM_COMPILATIONS:
+ case R600_QUERY_NUM_SHADERS_CREATED:
skip_allocation = true;
break;
default:
@@ -454,7 +476,7 @@ static boolean r600_begin_query(struct pipe_context *ctx,
rquery->begin_result = 0;
return true;
case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
+ rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
return true;
case R600_QUERY_NUM_CS_FLUSHES:
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
@@ -465,6 +487,12 @@ static boolean r600_begin_query(struct pipe_context *ctx,
case R600_QUERY_GPU_LOAD:
rquery->begin_result = r600_gpu_load_begin(rctx->screen);
return true;
+ case R600_QUERY_NUM_COMPILATIONS:
+ rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ return true;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ return true;
}
/* Discard the old query buffers. */
@@ -477,7 +505,7 @@ static boolean r600_begin_query(struct pipe_context *ctx,
/* Obtain a new buffer if the current one can't be mapped without a stall. */
if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
- rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
+ !rctx->ws->buffer_wait(rquery->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
}
@@ -487,9 +515,10 @@ static boolean r600_begin_query(struct pipe_context *ctx,
r600_emit_query_begin(rctx, rquery);
- if (!r600_is_timer_query(rquery->type)) {
+ if (r600_is_timer_query(rquery->type))
+ LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries);
+ else
LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
- }
return true;
}
@@ -515,7 +544,7 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
return;
case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
+ rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
return;
case R600_QUERY_NUM_CS_FLUSHES:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
@@ -541,13 +570,18 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
case R600_QUERY_GPU_LOAD:
rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
return;
+ case R600_QUERY_NUM_COMPILATIONS:
+ rquery->end_result = p_atomic_read(&rctx->screen->num_compilations);
+ return;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ return;
}
r600_emit_query_end(rctx, rquery);
- if (r600_query_needs_begin(rquery->type) && !r600_is_timer_query(rquery->type)) {
+ if (r600_query_needs_begin(rquery->type))
LIST_DELINIT(&rquery->list);
- }
}
static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
@@ -601,6 +635,8 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
+ case R600_QUERY_NUM_COMPILATIONS:
+ case R600_QUERY_NUM_SHADERS_CREATED:
result->u64 = query->end_result - query->begin_result;
return TRUE;
case R600_QUERY_GPU_LOAD:
@@ -751,7 +787,6 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
assert(0);
}
- ctx->ws->buffer_unmap(qbuf->buf->cs_buf);
return TRUE;
}
@@ -823,22 +858,37 @@ static void r600_render_condition(struct pipe_context *ctx,
}
}
-void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
+static void r600_suspend_queries(struct r600_common_context *ctx,
+ struct list_head *query_list,
+ unsigned *num_cs_dw_queries_suspend)
{
struct r600_query *query;
- LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+ LIST_FOR_EACH_ENTRY(query, query_list, list) {
r600_emit_query_end(ctx, query);
}
- assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+ assert(*num_cs_dw_queries_suspend == 0);
+}
+
+void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
+{
+ r600_suspend_queries(ctx, &ctx->active_nontimer_queries,
+ &ctx->num_cs_dw_nontimer_queries_suspend);
+}
+
+void r600_suspend_timer_queries(struct r600_common_context *ctx)
+{
+ r600_suspend_queries(ctx, &ctx->active_timer_queries,
+ &ctx->num_cs_dw_timer_queries_suspend);
}
-static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx)
+static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
+ struct list_head *query_list)
{
struct r600_query *query;
unsigned num_dw = 0;
- LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+ LIST_FOR_EACH_ENTRY(query, query_list, list) {
/* begin + end */
num_dw += query->num_cs_dw * 2;
@@ -857,21 +907,35 @@ static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
return num_dw;
}
-void r600_resume_nontimer_queries(struct r600_common_context *ctx)
+static void r600_resume_queries(struct r600_common_context *ctx,
+ struct list_head *query_list,
+ unsigned *num_cs_dw_queries_suspend)
{
struct r600_query *query;
+ unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list);
- assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+ assert(*num_cs_dw_queries_suspend == 0);
/* Check CS space here. Resuming must not be interrupted by flushes. */
- ctx->need_gfx_cs_space(&ctx->b,
- r600_queries_num_cs_dw_for_resuming(ctx), TRUE);
+ ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE);
- LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+ LIST_FOR_EACH_ENTRY(query, query_list, list) {
r600_emit_query_begin(ctx, query);
}
}
+void r600_resume_nontimer_queries(struct r600_common_context *ctx)
+{
+ r600_resume_queries(ctx, &ctx->active_nontimer_queries,
+ &ctx->num_cs_dw_nontimer_queries_suspend);
+}
+
+void r600_resume_timer_queries(struct r600_common_context *ctx)
+{
+ r600_resume_queries(ctx, &ctx->active_timer_queries,
+ &ctx->num_cs_dw_timer_queries_suspend);
+}
+
/* Get backends mask */
void r600_query_init_backend_mask(struct r600_common_context *ctx)
{
@@ -919,7 +983,6 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
if (results) {
memset(results, 0, ctx->max_db * 4 * 4);
- ctx->ws->buffer_unmap(buffer->cs_buf);
/* emit EVENT_WRITE for ZPASS_DONE */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
@@ -937,7 +1000,6 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
if (results[i*4 + 1])
mask |= (1<<i);
}
- ctx->ws->buffer_unmap(buffer->cs_buf);
}
}
@@ -966,4 +1028,5 @@ void r600_query_init(struct r600_common_context *rctx)
rctx->b.render_condition = r600_render_condition;
LIST_INITHEAD(&rctx->active_nontimer_queries);
+ LIST_INITHEAD(&rctx->active_timer_queries);
}
diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c
index bc8bf97ef89..0853f636a27 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -88,8 +88,7 @@ void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
12 + /* flush_vgt_streamout */
num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
- begin->num_dw = 12 + /* flush_vgt_streamout */
- 3; /* VGT_STRMOUT_BUFFER_CONFIG */
+ begin->num_dw = 12; /* flush_vgt_streamout */
if (rctx->chip_class >= SI) {
begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */
@@ -105,7 +104,7 @@ void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
(num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
(rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
- begin->dirty = true;
+ rctx->set_atom_dirty(rctx, begin, true);
r600_set_streamout_enable(rctx, true);
}
@@ -146,7 +145,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
if (num_targets) {
r600_streamout_buffers_dirty(rctx);
} else {
- rctx->streamout.begin_atom.dirty = false;
+ rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
r600_set_streamout_enable(rctx, false);
}
}
@@ -192,11 +191,6 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
r600_flush_vgt_streamout(rctx);
- r600_write_context_reg(cs, rctx->chip_class >= EVERGREEN ?
- R_028B98_VGT_STRMOUT_BUFFER_CONFIG :
- R_028B20_VGT_STRMOUT_BUFFER_EN,
- rctx->streamout.enabled_mask);
-
for (i = 0; i < rctx->streamout.num_targets; i++) {
if (!t[i])
continue;
@@ -326,20 +320,42 @@ static bool r600_get_strmout_en(struct r600_common_context *rctx)
static void r600_emit_streamout_enable(struct r600_common_context *rctx,
struct r600_atom *atom)
{
- r600_write_context_reg(rctx->rings.gfx.cs,
- rctx->chip_class >= EVERGREEN ?
- R_028B94_VGT_STRMOUT_CONFIG :
- R_028AB0_VGT_STRMOUT_EN,
- S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx)));
+ unsigned strmout_config_reg = R_028AB0_VGT_STRMOUT_EN;
+ unsigned strmout_config_val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx));
+ unsigned strmout_buffer_reg = R_028B20_VGT_STRMOUT_BUFFER_EN;
+ unsigned strmout_buffer_val = rctx->streamout.hw_enabled_mask &
+ rctx->streamout.enabled_stream_buffers_mask;
+
+ if (rctx->chip_class >= EVERGREEN) {
+ strmout_buffer_reg = R_028B98_VGT_STRMOUT_BUFFER_CONFIG;
+
+ strmout_config_reg = R_028B94_VGT_STRMOUT_CONFIG;
+ strmout_config_val |=
+ S_028B94_RAST_STREAM(0) |
+ S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) |
+ S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
+ S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
+ }
+ r600_write_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+ r600_write_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
}
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
{
bool old_strmout_en = r600_get_strmout_en(rctx);
+ unsigned old_hw_enabled_mask = rctx->streamout.hw_enabled_mask;
rctx->streamout.streamout_enabled = enable;
- if (old_strmout_en != r600_get_strmout_en(rctx))
- rctx->streamout.enable_atom.dirty = true;
+
+ rctx->streamout.hw_enabled_mask = rctx->streamout.enabled_mask |
+ (rctx->streamout.enabled_mask << 4) |
+ (rctx->streamout.enabled_mask << 8) |
+ (rctx->streamout.enabled_mask << 12);
+
+ if ((old_strmout_en != r600_get_strmout_en(rctx)) ||
+ (old_hw_enabled_mask != rctx->streamout.hw_enabled_mask)) {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+ }
}
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
@@ -354,8 +370,9 @@ void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
rctx->streamout.prims_gen_query_enabled =
rctx->streamout.num_prims_gen_queries != 0;
- if (old_strmout_en != r600_get_strmout_en(rctx))
- rctx->streamout.enable_atom.dirty = true;
+ if (old_strmout_en != r600_get_strmout_en(rctx)) {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+ }
}
}
@@ -365,5 +382,5 @@ void r600_streamout_init(struct r600_common_context *rctx)
rctx->b.stream_output_target_destroy = r600_so_target_destroy;
rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
- rctx->streamout.enable_atom.num_dw = 3;
+ rctx->streamout.enable_atom.num_dw = 6;
}
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index dc510c99749..54696910e43 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -243,10 +243,11 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
surface->level[0].mode >= RADEON_SURF_MODE_2D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
+ surface->pipe_config,
surface->bankw, surface->bankh,
surface->tile_split,
surface->stencil_tile_split,
- surface->mtilea,
+ surface->mtilea, surface->num_banks,
surface->level[0].pitch_bytes,
(surface->flags & RADEON_SURF_SCANOUT) != 0);
@@ -489,7 +490,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
unsigned num_pipes = rscreen->tiling_info.num_channels;
if (rscreen->chip_class <= EVERGREEN &&
- rscreen->info.drm_minor < 26)
+ rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
return 0;
/* HW bug on R6xx. */
@@ -501,7 +502,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
/* HTILE is broken with 1D tiling on old kernels and CIK. */
if (rscreen->chip_class >= CIK &&
rtex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
- rscreen->info.drm_minor < 38)
+ rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
return 0;
switch (num_pipes) {
@@ -706,6 +707,7 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
const struct pipe_resource *templ)
{
const struct util_format_description *desc = util_format_description(templ->format);
+ bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
/* MSAA resources must be 2D tiled. */
if (templ->nr_samples > 1)
@@ -715,10 +717,16 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
+ /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
+ if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
+ (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
+ (templ->target == PIPE_TEXTURE_2D ||
+ templ->target == PIPE_TEXTURE_3D))
+ force_tiling = true;
+
/* Handle common candidates for the linear mode.
* Compressed textures must always be tiled. */
- if (!(templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) &&
- !util_format_is_compressed(templ->format)) {
+ if (!force_tiling && !util_format_is_compressed(templ->format)) {
/* Not everything can be linear, so we cannot enforce it
* for all textures. */
if ((rscreen->debug_flags & DBG_NO_TILING) &&
@@ -934,7 +942,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
use_staging_texture = TRUE;
} else if (!(usage & PIPE_TRANSFER_READ) &&
(r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
- rctx->ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) {
+ !rctx->ws->buffer_wait(rtex->resource.buf, 0, RADEON_USAGE_READWRITE))) {
/* Use a staging texture for uploads if the underlying BO is busy. */
use_staging_texture = TRUE;
}
@@ -1059,18 +1067,9 @@ static void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct radeon_winsys_cs_handle *buf;
struct pipe_resource *texture = transfer->resource;
struct r600_texture *rtex = (struct r600_texture*)texture;
- if (rtransfer->staging) {
- buf = rtransfer->staging->cs_buf;
- } else {
- buf = r600_resource(transfer->resource)->cs_buf;
- }
- rctx->ws->buffer_unmap(buf);
-
if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
ctx->resource_copy_region(ctx, texture, transfer->level,
@@ -1262,7 +1261,9 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
/* fast color clear with 1D tiling doesn't work on old kernels and CIK */
if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
- rctx->chip_class >= CIK && rctx->screen->info.drm_minor < 38) {
+ rctx->chip_class >= CIK &&
+ rctx->screen->info.drm_major == 2 &&
+ rctx->screen->info.drm_minor < 38) {
continue;
}
@@ -1278,7 +1279,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
tex->cmask.offset, tex->cmask.size, 0, true);
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
- fb_state->dirty = true;
+ rctx->set_atom_dirty(rctx, fb_state, true);
*buffers &= ~clear_bit;
}
}
diff --git a/src/gallium/drivers/radeon/r600d_common.h b/src/gallium/drivers/radeon/r600d_common.h
index 74c8d8782a6..115042d153e 100644
--- a/src/gallium/drivers/radeon/r600d_common.h
+++ b/src/gallium/drivers/radeon/r600d_common.h
@@ -66,6 +66,9 @@
#define PKT3_SET_SH_REG 0x76 /* SI and later */
#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1 0x1 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2 0x2 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS3 0x3 /* EG and later */
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
#define EVENT_TYPE_ZPASS_DONE 0x15
@@ -177,7 +180,7 @@
#define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19)
#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20)
#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21)
-#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((x) & 0x7) << 24)
+#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((x) & 0x07) << 24)
#define S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) & 0x1) << 27)
#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc
#define S_028BDC_EXPAND_LINE_WIDTH(x) (((x) & 0x1) << 9)
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c
index 9b508227fd4..2e45d439e7a 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.c
+++ b/src/gallium/drivers/radeon/radeon_elf_util.c
@@ -103,8 +103,7 @@ static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
}
void radeon_elf_read(const char *elf_data, unsigned elf_size,
- struct radeon_shader_binary *binary,
- unsigned debug)
+ struct radeon_shader_binary *binary)
{
char *elf_buffer;
Elf *elf;
@@ -124,7 +123,6 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size,
elf = elf_memory(elf_buffer, elf_size);
elf_getshdrstrndx(elf, &section_str_index);
- binary->disassembled = 0;
while ((section = elf_nextscn(elf, section))) {
const char *name;
@@ -145,12 +143,11 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size,
binary->config_size = section_data->d_size;
binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
memcpy(binary->config, section_data->d_buf, binary->config_size);
- } else if (debug && !strcmp(name, ".AMDGPU.disasm")) {
- binary->disassembled = 1;
+ } else if (!strcmp(name, ".AMDGPU.disasm")) {
+ /* Always read disassembly if it's available. */
section_data = elf_getdata(section, section_data);
- fprintf(stderr, "\nShader Disassembly:\n\n");
- fprintf(stderr, "%.*s\n", (int)section_data->d_size,
- (char *)section_data->d_buf);
+ binary->disasm_string = strndup(section_data->d_buf,
+ section_data->d_size);
} else if (!strncmp(name, ".rodata", 7)) {
section_data = elf_getdata(section, section_data);
binary->rodata_size = section_data->d_size;
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.h b/src/gallium/drivers/radeon/radeon_elf_util.h
index ab83f98ea69..ea4ab2f14b2 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.h
+++ b/src/gallium/drivers/radeon/radeon_elf_util.h
@@ -37,7 +37,7 @@ struct radeon_shader_reloc;
* radeon_shader_binary object.
*/
void radeon_elf_read(const char *elf_data, unsigned elf_size,
- struct radeon_shader_binary *binary, unsigned debug);
+ struct radeon_shader_binary *binary);
/**
* @returns A pointer to the start of the configuration information for
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 6a9557b0b73..e967ad2214e 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -58,7 +58,6 @@ struct radeon_llvm_context {
unsigned type;
unsigned face_gpr;
unsigned two_side;
- unsigned clip_vertex;
unsigned inputs_count;
struct r600_shader_io * r600_inputs;
struct r600_shader_io * r600_outputs;
@@ -72,21 +71,6 @@ struct radeon_llvm_context {
/*=== Front end configuration ===*/
- /* Special Intrinsics */
-
- /** Write to an output register: float store_output(float, i32) */
- const char * store_output_intr;
-
- /** Swizzle a vector value: <4 x float> swizzle(<4 x float>, i32)
- * The swizzle is an unsigned integer that encodes a TGSI_SWIZZLE_* value
- * in 2-bits.
- * Swizzle{0-1} = X Channel
- * Swizzle{2-3} = Y Channel
- * Swizzle{4-5} = Z Channel
- * Swizzle{6-7} = W Channel
- */
- const char * swizzle_intr;
-
/* Instructions that are not described by any of the TGSI opcodes. */
/** This function is responsible for initilizing the inputs array and will be
@@ -100,9 +84,6 @@ struct radeon_llvm_context {
unsigned index,
const struct tgsi_full_declaration *decl);
- /** User data to use with the callbacks */
- void * userdata;
-
/** This array contains the input values for the shader. Typically these
* values will be in the form of a target intrinsic that will inform the
* backend how to load the actual inputs to the shader.
@@ -146,6 +127,8 @@ static inline LLVMTypeRef tgsi2llvmtype(
case TGSI_TYPE_UNSIGNED:
case TGSI_TYPE_SIGNED:
return LLVMInt32TypeInContext(ctx);
+ case TGSI_TYPE_DOUBLE:
+ return LLVMDoubleTypeInContext(ctx);
case TGSI_TYPE_UNTYPED:
case TGSI_TYPE_FLOAT:
return LLVMFloatTypeInContext(ctx);
@@ -171,8 +154,9 @@ static inline LLVMValueRef bitcast(
void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data,
- LLVMValueRef *coords_arg);
+ struct lp_build_emit_data * emit_data,
+ LLVMValueRef *coords_arg,
+ LLVMValueRef *derivs_arg);
void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
@@ -191,20 +175,29 @@ unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan);
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx);
-LLVMValueRef
-build_intrinsic(LLVMBuilderRef builder,
- const char *name,
- LLVMTypeRef ret_type,
- LLVMValueRef *args,
- unsigned num_args,
- LLVMAttribute attr);
-
void
build_tgsi_intrinsic_nomem(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data);
-
+LLVMValueRef
+radeon_llvm_emit_fetch_double(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef ptr,
+ LLVMValueRef ptr2);
+
+LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef value);
+
+LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type,
+ unsigned swizzle);
+
+void radeon_llvm_emit_store(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction * inst,
+ const struct tgsi_opcode_info * info,
+ LLVMValueRef dst[4]);
#endif /* RADEON_LLVM_H */
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 25580b6bd4c..00025590137 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -62,6 +62,8 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
switch (type) {
case TGSI_PROCESSOR_VERTEX:
+ case TGSI_PROCESSOR_TESS_CTRL:
+ case TGSI_PROCESSOR_TESS_EVAL:
llvm_type = RADEON_LLVM_SHADER_VS;
break;
case TGSI_PROCESSOR_GEOMETRY:
@@ -142,7 +144,8 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
* @returns 0 for success, 1 for failure
*/
unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
- const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm)
+ const char *gpu_family, bool dump_ir, bool dump_asm,
+ LLVMTargetMachineRef tm)
{
char cpu[CPU_STRING_LEN];
@@ -165,17 +168,15 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
}
strncpy(cpu, gpu_family, CPU_STRING_LEN);
memset(fs, 0, sizeof(fs));
- if (dump) {
+ if (dump_asm)
strncpy(fs, "+DumpCode", FS_STRING_LEN);
- }
tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
LLVMCodeGenLevelDefault, LLVMRelocDefault,
LLVMCodeModelDefault);
dispose_tm = true;
}
- if (dump) {
+ if (dump_ir)
LLVMDumpModule(M);
- }
/* Setup Diagnostic Handler*/
llvm_ctx = LLVMGetModuleContext(M);
@@ -204,7 +205,7 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
buffer_size = LLVMGetBufferSize(out_buffer);
buffer_data = LLVMGetBufferStart(out_buffer);
- radeon_elf_read(buffer_data, buffer_size, binary, dump);
+ radeon_elf_read(buffer_data, buffer_size, binary);
/* Clean up */
LLVMDisposeMemoryBuffer(out_buffer);
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h b/src/gallium/drivers/radeon/radeon_llvm_emit.h
index 3ccef78e36d..e20aed94c6b 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h
@@ -29,6 +29,7 @@
#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
+#include <stdbool.h>
struct radeon_shader_binary;
@@ -36,11 +37,8 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
-unsigned radeon_llvm_compile(
- LLVMModuleRef M,
- struct radeon_shader_binary *binary,
- const char * gpu_family,
- unsigned dump,
- LLVMTargetMachineRef tm);
+unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
+ const char *gpu_family, bool dump_ir, bool dump_asm,
+ LLVMTargetMachineRef tm);
#endif /* RADEON_LLVM_EMIT_H */
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index c8c980d9d32..56694700a47 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -109,12 +109,27 @@ emit_array_index(
return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
}
-static LLVMValueRef
-emit_fetch(
+LLVMValueRef
+radeon_llvm_emit_fetch_double(
struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle);
+ LLVMValueRef ptr,
+ LLVMValueRef ptr2)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMValueRef result;
+
+ result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
+
+ result = LLVMBuildInsertElement(builder,
+ result,
+ bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
+ bld_base->int_bld.zero, "");
+ result = LLVMBuildInsertElement(builder,
+ result,
+ bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
+ bld_base->int_bld.one, "");
+ return bitcast(bld_base, TGSI_TYPE_DOUBLE, result);
+}
static LLVMValueRef
emit_array_fetch(
@@ -136,7 +151,7 @@ emit_array_fetch(
for (i = 0; i < size; ++i) {
tmp_reg.Register.Index = i + range.First;
- LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle);
+ LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
result = LLVMBuildInsertElement(builder, result, temp,
lp_build_const_int32(gallivm, i), "");
}
@@ -150,23 +165,21 @@ static bool uses_temp_indirect_addressing(
return (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
}
-static LLVMValueRef
-emit_fetch(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle)
+LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type,
+ unsigned swizzle)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMValueRef result = NULL, ptr;
+ LLVMValueRef result = NULL, ptr, ptr2;
if (swizzle == ~0) {
LLVMValueRef values[TGSI_NUM_CHANNELS];
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- values[chan] = emit_fetch(bld_base, reg, type, chan);
+ values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
}
return lp_build_gather_values(bld_base->base.gallivm, values,
TGSI_NUM_CHANNELS);
@@ -184,11 +197,27 @@ emit_fetch(
switch(reg->Register.File) {
case TGSI_FILE_IMMEDIATE: {
LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
- return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+ if (type == TGSI_TYPE_DOUBLE) {
+ result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
+ result = LLVMConstInsertElement(result,
+ bld->immediates[reg->Register.Index][swizzle],
+ bld_base->int_bld.zero);
+ result = LLVMConstInsertElement(result,
+ bld->immediates[reg->Register.Index][swizzle + 1],
+ bld_base->int_bld.one);
+ return LLVMConstBitCast(result, ctype);
+ } else {
+ return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+ }
}
case TGSI_FILE_INPUT:
result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+ if (type == TGSI_TYPE_DOUBLE) {
+ ptr = result;
+ ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
+ return radeon_llvm_emit_fetch_double(bld_base, ptr, ptr2);
+ }
break;
case TGSI_FILE_TEMPORARY:
@@ -199,11 +228,23 @@ emit_fetch(
break;
}
ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
+ if (type == TGSI_TYPE_DOUBLE) {
+ ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
+ return radeon_llvm_emit_fetch_double(bld_base,
+ LLVMBuildLoad(builder, ptr, ""),
+ LLVMBuildLoad(builder, ptr2, ""));
+ }
result = LLVMBuildLoad(builder, ptr, "");
break;
case TGSI_FILE_OUTPUT:
ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
+ if (type == TGSI_TYPE_DOUBLE) {
+ ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
+ return radeon_llvm_emit_fetch_double(bld_base,
+ LLVMBuildLoad(builder, ptr, ""),
+ LLVMBuildLoad(builder, ptr2, ""));
+ }
result = LLVMBuildLoad(builder, ptr, "");
break;
@@ -321,8 +362,8 @@ static void emit_declaration(
}
}
-static LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef value)
+LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef value)
{
struct lp_build_emit_data clamp_emit_data;
@@ -336,8 +377,7 @@ static LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
&clamp_emit_data);
}
-static void
-emit_store(
+void radeon_llvm_emit_store(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_instruction * inst,
const struct tgsi_opcode_info * info,
@@ -348,9 +388,10 @@ emit_store(
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
const struct tgsi_full_dst_register *reg = &inst->Dst[0];
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
- LLVMValueRef temp_ptr;
+ LLVMValueRef temp_ptr, temp_ptr2 = NULL;
unsigned chan, chan_index;
boolean is_vec_store = FALSE;
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
if (dst[0]) {
LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
@@ -371,6 +412,8 @@ emit_store(
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
LLVMValueRef value = dst[chan_index];
+ if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
+ continue;
if (inst->Instruction.Saturate)
value = radeon_llvm_saturate(bld_base, value);
@@ -379,8 +422,9 @@ emit_store(
LLVMBuildStore(builder, value, temp_ptr);
continue;
}
-
- value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+
+ if (dtype != TGSI_TYPE_DOUBLE)
+ value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
if (reg->Register.Indirect) {
struct tgsi_declaration_range range = get_array_range(bld_base,
@@ -418,6 +462,8 @@ emit_store(
switch(reg->Register.File) {
case TGSI_FILE_OUTPUT:
temp_ptr = bld->outputs[reg->Register.Index][chan_index];
+ if (dtype == TGSI_TYPE_DOUBLE)
+ temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
break;
case TGSI_FILE_TEMPORARY:
@@ -428,12 +474,28 @@ emit_store(
break;
}
temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
+ if (dtype == TGSI_TYPE_DOUBLE)
+ temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
+
break;
default:
return;
}
- LLVMBuildStore(builder, value, temp_ptr);
+ if (dtype != TGSI_TYPE_DOUBLE)
+ LLVMBuildStore(builder, value, temp_ptr);
+ else {
+ LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
+ LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
+ LLVMValueRef val2;
+ value = LLVMBuildExtractElement(builder, ptr,
+ bld_base->uint_bld.zero, "");
+ val2 = LLVMBuildExtractElement(builder, ptr,
+ bld_base->uint_bld.one, "");
+
+ LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
+ LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
+ }
}
}
}
@@ -686,34 +748,26 @@ static void kil_emit(
}
}
-void radeon_llvm_emit_prepare_cube_coords(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data,
- LLVMValueRef *coords_arg)
+static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef *in, LLVMValueRef *out)
{
-
- unsigned target = emit_data->inst->Texture.Texture;
- unsigned opcode = emit_data->inst->Instruction.Opcode;
struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef type = bld_base->base.elem_type;
LLVMValueRef coords[4];
LLVMValueRef mad_args[3];
- LLVMValueRef idx;
- struct LLVMOpaqueValue *cube_vec;
- LLVMValueRef v;
+ LLVMValueRef v, cube_vec;
unsigned i;
- cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4);
- v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
+ cube_vec = lp_build_gather_values(bld_base->base.gallivm, in, 4);
+ v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
&cube_vec, 1, LLVMReadNoneAttribute);
- for (i = 0; i < 4; ++i) {
- idx = lp_build_const_int32(gallivm, i);
- coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
- }
+ for (i = 0; i < 4; ++i)
+ coords[i] = LLVMBuildExtractElement(builder, v,
+ lp_build_const_int32(gallivm, i), "");
- coords[2] = build_intrinsic(builder, "fabs",
+ coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
type, &coords[2], 1, LLVMReadNoneAttribute);
coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
@@ -729,10 +783,60 @@ void radeon_llvm_emit_prepare_cube_coords(
mad_args[0], mad_args[1], mad_args[2]);
/* apply xyz = yxw swizzle to cooords */
- coords[2] = coords[3];
- coords[3] = coords[1];
- coords[1] = coords[0];
- coords[0] = coords[3];
+ out[0] = coords[1];
+ out[1] = coords[0];
+ out[2] = coords[3];
+}
+
+void radeon_llvm_emit_prepare_cube_coords(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data,
+ LLVMValueRef *coords_arg,
+ LLVMValueRef *derivs_arg)
+{
+
+ unsigned target = emit_data->inst->Texture.Texture;
+ unsigned opcode = emit_data->inst->Instruction.Opcode;
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef coords[4];
+ unsigned i;
+
+ radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
+
+ if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
+ LLVMValueRef derivs[4];
+ int axis;
+
+ /* Convert cube derivatives to 2D derivatives. */
+ for (axis = 0; axis < 2; axis++) {
+ LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
+
+ /* Shift the cube coordinates by the derivatives to get
+ * the cube coordinates of the "neighboring pixel".
+ */
+ for (i = 0; i < 3; i++)
+ shifted_cube_coords[i] =
+ LLVMBuildFAdd(builder, coords_arg[i],
+ derivs_arg[axis*3+i], "");
+ shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
+
+ /* Project the shifted cube coordinates onto the face. */
+ radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
+ shifted_coords);
+
+ /* Subtract both sets of 2D coordinates to get 2D derivatives.
+ * This won't work if the shifted coordinates ended up
+ * in a different face.
+ */
+ for (i = 0; i < 2; i++)
+ derivs[axis * 2 + i] =
+ LLVMBuildFSub(builder, shifted_coords[i],
+ coords[i], "");
+ }
+
+ memcpy(derivs_arg, derivs, sizeof(derivs));
+ }
if (target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
@@ -756,140 +860,6 @@ void radeon_llvm_emit_prepare_cube_coords(
memcpy(coords_arg, coords, sizeof(coords));
}
-static void txd_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
-
- LLVMValueRef coords[4];
- unsigned chan, src;
- for (src = 0; src < 3; src++) {
- for (chan = 0; chan < 4; chan++)
- coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
-
- emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- }
- emit_data->arg_count = 3;
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-
-static void txp_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
- LLVMValueRef src_w;
- unsigned chan;
- LLVMValueRef coords[5];
-
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
- src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
-
- for (chan = 0; chan < 3; chan++ ) {
- LLVMValueRef arg = lp_build_emit_fetch(bld_base,
- emit_data->inst, 0, chan);
- coords[chan] = lp_build_emit_llvm_binary(bld_base,
- TGSI_OPCODE_DIV, arg, src_w);
- }
- coords[3] = bld_base->base.one;
-
- if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
- }
-
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- emit_data->arg_count = 1;
-}
-
-static void tex_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- /* XXX: lp_build_swizzle_aos() was failing with wrong arg types,
- * when we used CHAN_ALL. We should be able to get this to work,
- * but for now we will swizzle it ourselves
- emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
- 0, CHAN_ALL);
-
- */
-
- const struct tgsi_full_instruction * inst = emit_data->inst;
-
- LLVMValueRef coords[5];
- unsigned chan;
- for (chan = 0; chan < 4; chan++) {
- coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
- }
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
- /* These instructions have additional operand that should be packed
- * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
- * That operand should be passed as a float value in the args array
- * right after the coord vector. After packing it's not used anymore,
- * that's why arg_count is not increased */
- coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
- }
-
- if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
- }
-
- emit_data->arg_count = 1;
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-static void txf_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
- struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
- const struct tgsi_texture_offset * off = inst->TexOffsets;
- LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
-
- /* fetch tex coords */
- tex_fetch_args(bld_base, emit_data);
-
- /* fetch tex offsets */
- if (inst->Texture.NumOffsets) {
- assert(inst->Texture.NumOffsets == 1);
-
- emit_data->args[1] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleX],
- offset_type);
- emit_data->args[2] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleY],
- offset_type);
- emit_data->args[3] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleZ],
- offset_type);
- } else {
- emit_data->args[1] = bld_base->int_bld.zero;
- emit_data->args[2] = bld_base->int_bld.zero;
- emit_data->args[3] = bld_base->int_bld.zero;
- }
-
- emit_data->arg_count = 4;
-}
-
static void emit_icmp(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -996,6 +966,35 @@ static void emit_fcmp(
emit_data->output[emit_data->chan] = v;
}
+static void emit_dcmp(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+ LLVMRealPredicate pred;
+
+ /* Use ordered for everything but NE (which is usual for
+ * float comparisons)
+ */
+ switch (emit_data->inst->Instruction.Opcode) {
+ case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
+ case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
+ case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
+ case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
+ default: assert(!"unknown instruction"); pred = 0; break;
+ }
+
+ LLVMValueRef v = LLVMBuildFCmp(builder, pred,
+ emit_data->args[0], emit_data->args[1],"");
+
+ v = LLVMBuildSExtOrBitCast(builder, v,
+ LLVMInt32TypeInContext(context), "");
+
+ emit_data->output[emit_data->chan] = v;
+}
+
static void emit_not(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -1161,6 +1160,40 @@ static void emit_ineg(
emit_data->args[0], "");
}
+static void emit_dneg(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
+ emit_data->args[0], "");
+}
+
+static void emit_frac(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ char *intr;
+
+ if (emit_data->info->opcode == TGSI_OPCODE_FRC)
+ intr = "llvm.floor.f32";
+ else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
+ intr = "llvm.floor.f64";
+ else {
+ assert(0);
+ return;
+ }
+
+ LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
+ &emit_data->args[0], 1,
+ LLVMReadNoneAttribute);
+ emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
+ emit_data->args[0], floor, "");
+}
+
static void emit_f2i(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -1215,58 +1248,16 @@ static void emit_immediate(struct lp_build_tgsi_context * bld_base,
ctx->soa.num_immediates++;
}
-LLVMValueRef
-build_intrinsic(LLVMBuilderRef builder,
- const char *name,
- LLVMTypeRef ret_type,
- LLVMValueRef *args,
- unsigned num_args,
- LLVMAttribute attr)
-{
- LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
- LLVMValueRef function;
-
- function = LLVMGetNamedFunction(module, name);
- if(!function) {
- LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
- unsigned i;
-
- assert(num_args <= LP_MAX_FUNC_ARGS);
-
- for(i = 0; i < num_args; ++i) {
- assert(args[i]);
- arg_types[i] = LLVMTypeOf(args[i]);
- }
-
- function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
-
- if (attr)
- LLVMAddFunctionAttr(function, attr);
- }
-
- return LLVMBuildCall(builder, function, args, num_args, "");
-}
-
-static void build_tgsi_intrinsic(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data,
- LLVMAttribute attr)
-{
- struct lp_build_context * base = &bld_base->base;
- emit_data->output[emit_data->chan] = build_intrinsic(
- base->gallivm->builder, action->intr_name,
- emit_data->dst_type, emit_data->args,
- emit_data->arg_count, attr);
-}
-
void
-build_tgsi_intrinsic_nomem(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
+build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
{
- build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute);
+ struct lp_build_context * base = &bld_base->base;
+ emit_data->output[emit_data->chan] =
+ lp_build_intrinsic(base->gallivm->builder, action->intr_name,
+ emit_data->dst_type, emit_data->args,
+ emit_data->arg_count, LLVMReadNoneAttribute);
}
static void emit_bfi(const struct lp_build_tgsi_action * action,
@@ -1322,7 +1313,7 @@ static void emit_lsb(const struct lp_build_tgsi_action * action,
};
emit_data->output[emit_data->chan] =
- build_intrinsic(gallivm->builder, "llvm.cttz.i32",
+ lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
emit_data->dst_type, args, Elements(args),
LLVMReadNoneAttribute);
}
@@ -1341,7 +1332,7 @@ static void emit_umsb(const struct lp_build_tgsi_action * action,
};
LLVMValueRef msb =
- build_intrinsic(builder, "llvm.ctlz.i32",
+ lp_build_intrinsic(builder, "llvm.ctlz.i32",
emit_data->dst_type, args, Elements(args),
LLVMReadNoneAttribute);
@@ -1368,7 +1359,7 @@ static void emit_imsb(const struct lp_build_tgsi_action * action,
LLVMValueRef arg = emit_data->args[0];
LLVMValueRef msb =
- build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
+ lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
emit_data->dst_type, &arg, 1,
LLVMReadNoneAttribute);
@@ -1407,12 +1398,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
ctx->gallivm.context);
ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
- ctx->store_output_intr = "llvm.AMDGPU.store.output.";
- ctx->swizzle_intr = "llvm.AMDGPU.swizzle";
struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
- /* XXX: We need to revisit this.I think the correct way to do this is
- * to use length = 4 here and use the elem_bld for everything. */
type.floating = TRUE;
type.fixed = FALSE;
type.sign = TRUE;
@@ -1423,28 +1410,32 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
+ {
+ struct lp_type dbl_type;
+ dbl_type = type;
+ dbl_type.width *= 2;
+ lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
+ }
bld_base->soa = 1;
- bld_base->emit_store = emit_store;
+ bld_base->emit_store = radeon_llvm_emit_store;
bld_base->emit_swizzle = emit_swizzle;
bld_base->emit_declaration = emit_declaration;
bld_base->emit_immediate = emit_immediate;
- bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch;
- bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch;
- bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch;
- bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch;
+ bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
+ bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
+ bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
+ bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
/* Allocate outputs */
ctx->soa.outputs = ctx->outputs;
- /* XXX: Is there a better way to initialize all this ? */
-
lp_set_default_actions(bld_base);
bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs";
+ bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
@@ -1453,7 +1444,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";
bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
+ bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
@@ -1461,21 +1452,30 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
- bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
- bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
- bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
+ bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
+ bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
+ bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
+ bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
+ bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
+ bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
+ bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
+ bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.AMDGPU.rsq.f64";
+ bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor";
+ bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
- bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
+ bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
@@ -1520,6 +1520,9 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
+ bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name =
+ HAVE_LLVM >= 0x0305 ? "llvm.AMDGPU.rsq.clamped.f32" : "llvm.AMDGPU.rsq";
+ bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
@@ -1532,26 +1535,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
- bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
- bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
- bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
- bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
- bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
- bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
- bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
@@ -1571,13 +1554,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
-
- bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem;
-#if HAVE_LLVM >= 0x0305
- bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq.clamped.f32";
-#else
- bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq";
-#endif
}
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index be58d0b9ce3..16ee5410273 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -57,6 +57,7 @@
#define FB_BUFFER_OFFSET 0x1000
#define FB_BUFFER_SIZE 2048
+#define IT_SCALING_TABLE_SIZE 992
/* UVD decoder representation */
struct ruvd_decoder {
@@ -65,6 +66,7 @@ struct ruvd_decoder {
ruvd_set_dtb set_dtb;
unsigned stream_handle;
+ unsigned stream_type;
unsigned frame_number;
struct pipe_screen *screen;
@@ -73,15 +75,18 @@ struct ruvd_decoder {
unsigned cur_buffer;
- struct rvid_buffer msg_fb_buffers[NUM_BUFFERS];
+ struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS];
struct ruvd_msg *msg;
uint32_t *fb;
+ uint8_t *it;
struct rvid_buffer bs_buffers[NUM_BUFFERS];
void* bs_ptr;
unsigned bs_size;
struct rvid_buffer dpb;
+ bool use_legacy;
+ struct rvid_buffer ctx;
};
/* flush IB to the hardware */
@@ -107,19 +112,34 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
RADEON_PRIO_MIN);
- set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
- set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
+ if (!dec->use_legacy) {
+ uint64_t addr;
+ addr = dec->ws->buffer_get_virtual_address(cs_buf);
+ addr = addr + off;
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr);
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32);
+ } else {
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
+ }
set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
}
-/* map the next available message/feedback buffer */
-static void map_msg_fb_buf(struct ruvd_decoder *dec)
+/* do the codec needs an IT buffer ?*/
+static bool have_it(struct ruvd_decoder *dec)
+{
+ return dec->stream_type == RUVD_CODEC_H264_PERF ||
+ dec->stream_type == RUVD_CODEC_H265;
+}
+
+/* map the next available message/feedback/itscaling buffer */
+static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
{
struct rvid_buffer* buf;
uint8_t *ptr;
/* grab the current message/feedback buffer */
- buf = &dec->msg_fb_buffers[dec->cur_buffer];
+ buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* and map it for CPU access */
ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs, PIPE_TRANSFER_WRITE);
@@ -127,6 +147,8 @@ static void map_msg_fb_buf(struct ruvd_decoder *dec)
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
+ if (have_it(dec))
+ dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
}
/* unmap and send a message command to the VCPU */
@@ -139,12 +161,13 @@ static void send_msg_buf(struct ruvd_decoder *dec)
return;
/* grab the current message buffer */
- buf = &dec->msg_fb_buffers[dec->cur_buffer];
+ buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* unmap the buffer */
dec->ws->buffer_unmap(buf->res->cs_buf);
dec->msg = NULL;
dec->fb = NULL;
+ dec->it = NULL;
/* and send it to the hardware */
send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0,
@@ -159,11 +182,12 @@ static void next_buffer(struct ruvd_decoder *dec)
}
/* convert the profile into something UVD understands */
-static uint32_t profile2stream_type(enum pipe_video_profile profile)
+static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
{
- switch (u_reduce_video_profile(profile)) {
+ switch (u_reduce_video_profile(dec->base.profile)) {
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- return RUVD_CODEC_H264;
+ return (family >= CHIP_TONGA) ?
+ RUVD_CODEC_H264_PERF : RUVD_CODEC_H264;
case PIPE_VIDEO_FORMAT_VC1:
return RUVD_CODEC_VC1;
@@ -174,23 +198,46 @@ static uint32_t profile2stream_type(enum pipe_video_profile profile)
case PIPE_VIDEO_FORMAT_MPEG4:
return RUVD_CODEC_MPEG4;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ return RUVD_CODEC_H265;
+
default:
assert(0);
return 0;
}
}
+static unsigned calc_ctx_size(struct ruvd_decoder *dec)
+{
+ unsigned width_in_mb, height_in_mb, ctx_size;
+
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+
+ unsigned max_references = dec->base.max_references + 1;
+
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
+
+ width = align (width, 16);
+ height = align (height, 16);
+ ctx_size = ((width + 255) / 16)*((height + 255) / 16) * 16 * max_references + 52 * 1024;
+ return ctx_size;
+}
+
/* calculate size of reference picture buffer */
-static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
+static unsigned calc_dpb_size(struct ruvd_decoder *dec)
{
unsigned width_in_mb, height_in_mb, image_size, dpb_size;
// always align them to MB size for dpb calculation
- unsigned width = align(templ->width, VL_MACROBLOCK_WIDTH);
- unsigned height = align(templ->height, VL_MACROBLOCK_HEIGHT);
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
// always one more for currently decoded picture
- unsigned max_references = templ->max_references + 1;
+ unsigned max_references = dec->base.max_references + 1;
// aligned size of a single frame
image_size = width * height;
@@ -201,19 +248,67 @@ static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
width_in_mb = width / VL_MACROBLOCK_WIDTH;
height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
- switch (u_reduce_video_profile(templ->profile)) {
- case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- // the firmware seems to allways assume a minimum of ref frames
- max_references = MAX2(NUM_H264_REFS, max_references);
-
- // reference picture buffer
- dpb_size = image_size * max_references;
+ switch (u_reduce_video_profile(dec->base.profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
+ if (!dec->use_legacy) {
+ unsigned fs_in_mb = width_in_mb * height_in_mb;
+ unsigned alignment = 64, num_dpb_buffer;
+
+ if (dec->stream_type == RUVD_CODEC_H264_PERF)
+ alignment = 256;
+ switch(dec->base.level) {
+ case 30:
+ num_dpb_buffer = 8100 / fs_in_mb;
+ break;
+ case 31:
+ num_dpb_buffer = 18000 / fs_in_mb;
+ break;
+ case 32:
+ num_dpb_buffer = 20480 / fs_in_mb;
+ break;
+ case 41:
+ num_dpb_buffer = 32768 / fs_in_mb;
+ break;
+ case 42:
+ num_dpb_buffer = 34816 / fs_in_mb;
+ break;
+ case 50:
+ num_dpb_buffer = 110400 / fs_in_mb;
+ break;
+ case 51:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ default:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ }
+ num_dpb_buffer++;
+ max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
+ dpb_size = image_size * max_references;
+ dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment);
+ dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
+ } else {
+ // the firmware seems to allways assume a minimum of ref frames
+ max_references = MAX2(NUM_H264_REFS, max_references);
+ // reference picture buffer
+ dpb_size = image_size * max_references;
+ // macroblock context buffer
+ dpb_size += width_in_mb * height_in_mb * max_references * 192;
+ // IT surface buffer
+ dpb_size += width_in_mb * height_in_mb * 32;
+ }
+ break;
+ }
- // macroblock context buffer
- dpb_size += width_in_mb * height_in_mb * max_references * 192;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
- // IT surface buffer
- dpb_size += width_in_mb * height_in_mb * 32;
+ width = align (width, 16);
+ height = align (height, 16);
+ dpb_size = align((width * height * 3) / 2, 256) * max_references;
break;
case PIPE_VIDEO_FORMAT_VC1:
@@ -250,6 +345,8 @@ static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
// IT surface buffer
dpb_size += align(width_in_mb * height_in_mb * 32, 64);
+
+ dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
break;
default:
@@ -263,6 +360,12 @@ static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
return dpb_size;
}
+/* free associated data in the video buffer callback */
+static void ruvd_destroy_associated_data(void *data)
+{
+ /* NOOP, since we only use an intptr */
+}
+
/* get h264 specific message bits */
static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
{
@@ -286,10 +389,8 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
assert(0);
break;
}
- if (((dec->base.width * dec->base.height) >> 8) <= 1620)
- result.level = 30;
- else
- result.level = 41;
+
+ result.level = dec->base.level;
result.sps_info_flags = 0;
result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
@@ -338,6 +439,11 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
+ if (dec->stream_type == RUVD_CODEC_H264_PERF) {
+ memcpy(dec->it, result.scaling_list_4x4, 6*16);
+ memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);
+ }
+
result.num_ref_frames = pic->num_ref_frames;
result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
@@ -354,6 +460,151 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
return result;
}
+/* get h265 specific message bits */
+static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target,
+ struct pipe_h265_picture_desc *pic)
+{
+ struct ruvd_h265 result;
+ unsigned i;
+
+ memset(&result, 0, sizeof(result));
+
+ result.sps_info_flags = 0;
+ result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
+ result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
+ result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
+ result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
+ result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
+ result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
+ result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
+ result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
+ result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
+ if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
+ result.sps_info_flags |= 1 << 9;
+
+ result.chroma_format = pic->pps->sps->chroma_format_idc;
+ result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+ result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+ result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+ result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
+ result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+ result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2;
+ result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size;
+ result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
+ result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
+ result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
+ result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
+ result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
+ result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
+
+ result.pps_info_flags = 0;
+ result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
+ result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
+ result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
+ result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
+ result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
+ result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
+ result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
+ result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
+ result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
+ result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
+ result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
+ result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
+ result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
+ result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
+ result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
+ result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
+ result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
+ result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
+ result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
+ result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
+ //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???
+
+ result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
+ result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
+ result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
+ result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
+ result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
+ result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
+ result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
+ result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
+ result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
+ result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
+ result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
+ result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
+ result.init_qp_minus26 = pic->pps->init_qp_minus26;
+
+ for (i = 0; i < 19; ++i)
+ result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
+
+ for (i = 0; i < 21; ++i)
+ result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
+
+ result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
+ result.curr_idx = pic->CurrPicOrderCntVal;
+ result.curr_poc = pic->CurrPicOrderCntVal;
+
+ vl_video_buffer_set_associated_data(target, &dec->base,
+ (void *)(uintptr_t)pic->CurrPicOrderCntVal,
+ &ruvd_destroy_associated_data);
+
+ for (i = 0; i < 16; ++i) {
+ struct pipe_video_buffer *ref = pic->ref[i];
+ uintptr_t ref_pic = 0;
+
+ result.poc_list[i] = pic->PicOrderCntVal[i];
+
+ if (ref)
+ ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+ else
+ ref_pic = 0x7F;
+ result.ref_pic_list[i] = ref_pic;
+ }
+
+ for (i = 0; i < 8; ++i) {
+ result.ref_pic_set_st_curr_before[i] = 0xFF;
+ result.ref_pic_set_st_curr_after[i] = 0xFF;
+ result.ref_pic_set_lt_curr[i] = 0xFF;
+ }
+
+ for (i = 0; i < pic->NumPocStCurrBefore; ++i)
+ result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
+
+ for (i = 0; i < pic->NumPocStCurrAfter; ++i)
+ result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
+
+ for (i = 0; i < pic->NumPocLtCurr; ++i)
+ result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
+
+ for (i = 0; i < 6; ++i)
+ result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
+
+ for (i = 0; i < 2; ++i)
+ result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
+
+ memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
+ memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
+ memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
+ memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
+
+ /* TODO
+ result.highestTid;
+ result.isNonRef;
+
+ IDRPicFlag;
+ RAPPicFlag;
+ NumPocTotalCurr;
+ NumShortTermPictureSliceHeaderBits;
+ NumLongTermPictureSliceHeaderBits;
+
+ IsLongTerm[16];
+ */
+
+ return result;
+}
+
/* get vc1 specific message bits */
static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
{
@@ -556,7 +807,7 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
assert(decoder);
- map_msg_fb_buf(dec);
+ map_msg_fb_it_buf(dec);
memset(dec->msg, 0, sizeof(*dec->msg));
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_DESTROY;
@@ -568,21 +819,17 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
+ rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
}
rvid_destroy_buffer(&dec->dpb);
+ if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC)
+ rvid_destroy_buffer(&dec->ctx);
FREE(dec);
}
-/* free associated data in the video buffer callback */
-static void ruvd_destroy_associated_data(void *data)
-{
- /* NOOP, since we only use an intptr */
-}
-
/**
* start decoding of a new frame
*/
@@ -670,7 +917,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
{
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
struct radeon_winsys_cs_handle *dt;
- struct rvid_buffer *msg_fb_buf, *bs_buf;
+ struct rvid_buffer *msg_fb_it_buf, *bs_buf;
unsigned bs_size;
assert(decoder);
@@ -678,26 +925,27 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
if (!dec->bs_ptr)
return;
- msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer];
+ msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
bs_buf = &dec->bs_buffers[dec->cur_buffer];
bs_size = align(dec->bs_size, 128);
memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
dec->ws->buffer_unmap(bs_buf->res->cs_buf);
- map_msg_fb_buf(dec);
+ map_msg_fb_it_buf(dec);
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_DECODE;
dec->msg->stream_handle = dec->stream_handle;
dec->msg->status_report_feedback_number = dec->frame_number;
- dec->msg->body.decode.stream_type = profile2stream_type(dec->base.profile);
+ dec->msg->body.decode.stream_type = dec->stream_type;
dec->msg->body.decode.decode_flags = 0x1;
dec->msg->body.decode.width_in_samples = dec->base.width;
dec->msg->body.decode.height_in_samples = dec->base.height;
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
dec->msg->body.decode.bsd_size = bs_size;
+ dec->msg->body.decode.db_pitch = dec->base.width;
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
@@ -706,6 +954,10 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
break;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
+ break;
+
case PIPE_VIDEO_FORMAT_VC1:
dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
break;
@@ -733,12 +985,19 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->cs_buf, 0,
RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+ if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
+ send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->cs_buf, 0,
+ RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+ }
send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->cs_buf,
0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
- send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->res->cs_buf,
+ send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->cs_buf,
FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
+ if (have_it(dec))
+ send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->cs_buf,
+ FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
set_reg(dec, RUVD_ENGINE_CNTL, 1);
flush(dec);
@@ -760,7 +1019,8 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
ruvd_set_dtb set_dtb)
{
struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
- unsigned dpb_size = calc_dpb_size(templ);
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
+ unsigned dpb_size;
unsigned width = templ->width, height = templ->height;
unsigned bs_buf_size;
struct radeon_info info;
@@ -791,6 +1051,9 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
if (!dec)
return NULL;
+ if (info.drm_major < 3)
+ dec->use_legacy = TRUE;
+
dec->base = *templ;
dec->base.context = context;
dec->base.width = width;
@@ -803,11 +1066,12 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
dec->base.end_frame = ruvd_end_frame;
dec->base.flush = ruvd_flush;
+ dec->stream_type = profile2stream_type(dec, info.family);
dec->set_dtb = set_dtb;
dec->stream_handle = rvid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
- dec->cs = ws->cs_create(ws, RING_UVD, NULL, NULL, NULL);
+ dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, NULL);
if (!dec->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
@@ -815,10 +1079,12 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
bs_buf_size = width * height * 512 / (16 * 16);
for (i = 0; i < NUM_BUFFERS; ++i) {
- unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
+ unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
- if (!rvid_create_buffer(dec->screen, &dec->msg_fb_buffers[i],
- msg_fb_size, PIPE_USAGE_STAGING)) {
+ if (have_it(dec))
+ msg_fb_it_size += IT_SCALING_TABLE_SIZE;
+ if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
+ msg_fb_it_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated message buffers.\n");
goto error;
}
@@ -829,10 +1095,12 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
goto error;
}
- rvid_clear_buffer(context, &dec->msg_fb_buffers[i]);
+ rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
rvid_clear_buffer(context, &dec->bs_buffers[i]);
}
+ dpb_size = calc_dpb_size(dec);
+
if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated dpb.\n");
goto error;
@@ -840,14 +1108,23 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
rvid_clear_buffer(context, &dec->dpb);
- map_msg_fb_buf(dec);
+ if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC) {
+ unsigned ctx_size = calc_ctx_size(dec);
+ if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't allocated context buffer.\n");
+ goto error;
+ }
+ rvid_clear_buffer(context, &dec->ctx);
+ }
+
+ map_msg_fb_it_buf(dec);
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_CREATE;
dec->msg->stream_handle = dec->stream_handle;
- dec->msg->body.create.stream_type = profile2stream_type(dec->base.profile);
+ dec->msg->body.create.stream_type = dec->stream_type;
dec->msg->body.create.width_in_samples = dec->base.width;
dec->msg->body.create.height_in_samples = dec->base.height;
- dec->msg->body.create.dpb_size = dec->dpb.res->buf->size;
+ dec->msg->body.create.dpb_size = dpb_size;
send_msg_buf(dec);
flush(dec);
next_buffer(dec);
@@ -858,11 +1135,13 @@ error:
if (dec->cs) dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
+ rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
}
rvid_destroy_buffer(&dec->dpb);
+ if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC)
+ rvid_destroy_buffer(&dec->ctx);
FREE(dec);
diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h
index 7442865c9ec..452fbd60880 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.h
+++ b/src/gallium/drivers/radeon/radeon_uvd.h
@@ -62,6 +62,8 @@
#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002
#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003
#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100
+#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204
+#define RUVD_CMD_CONTEXT_BUFFER 0x00000206
/* UVD message types */
#define RUVD_MSG_CREATE 0
@@ -73,6 +75,8 @@
#define RUVD_CODEC_VC1 0x00000001
#define RUVD_CODEC_MPEG2 0x00000003
#define RUVD_CODEC_MPEG4 0x00000004
+#define RUVD_CODEC_H264_PERF 0x00000007
+#define RUVD_CODEC_H265 0x00000010
/* UVD decode target buffer tiling mode */
#define RUVD_TILE_LINEAR 0x00000000
@@ -171,6 +175,66 @@ struct ruvd_h264 {
} mvc;
};
+struct ruvd_h265 {
+ uint32_t sps_info_flags;
+ uint32_t pps_info_flags;
+
+ uint8_t chroma_format;
+ uint8_t bit_depth_luma_minus8;
+ uint8_t bit_depth_chroma_minus8;
+ uint8_t log2_max_pic_order_cnt_lsb_minus4;
+
+ uint8_t sps_max_dec_pic_buffering_minus1;
+ uint8_t log2_min_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_luma_coding_block_size;
+ uint8_t log2_min_transform_block_size_minus2;
+
+ uint8_t log2_diff_max_min_transform_block_size;
+ uint8_t max_transform_hierarchy_depth_inter;
+ uint8_t max_transform_hierarchy_depth_intra;
+ uint8_t pcm_sample_bit_depth_luma_minus1;
+
+ uint8_t pcm_sample_bit_depth_chroma_minus1;
+ uint8_t log2_min_pcm_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_pcm_luma_coding_block_size;
+ uint8_t num_extra_slice_header_bits;
+
+ uint8_t num_short_term_ref_pic_sets;
+ uint8_t num_long_term_ref_pic_sps;
+ uint8_t num_ref_idx_l0_default_active_minus1;
+ uint8_t num_ref_idx_l1_default_active_minus1;
+
+ int8_t pps_cb_qp_offset;
+ int8_t pps_cr_qp_offset;
+ int8_t pps_beta_offset_div2;
+ int8_t pps_tc_offset_div2;
+
+ uint8_t diff_cu_qp_delta_depth;
+ uint8_t num_tile_columns_minus1;
+ uint8_t num_tile_rows_minus1;
+ uint8_t log2_parallel_merge_level_minus2;
+
+ uint16_t column_width_minus1[19];
+ uint16_t row_height_minus1[21];
+
+ int8_t init_qp_minus26;
+ uint8_t num_delta_pocs_ref_rps_idx;
+ uint8_t curr_idx;
+ uint8_t reserved1;
+ int32_t curr_poc;
+ uint8_t ref_pic_list[16];
+ int32_t poc_list[16];
+ uint8_t ref_pic_set_st_curr_before[8];
+ uint8_t ref_pic_set_st_curr_after[8];
+ uint8_t ref_pic_set_lt_curr[8];
+
+ uint8_t ucScalingListDCCoefSizeID2[6];
+ uint8_t ucScalingListDCCoefSizeID3[2];
+
+ uint8_t highestTid;
+ uint8_t isNonRef;
+};
+
struct ruvd_vc1 {
uint32_t profile;
uint32_t level;
@@ -327,6 +391,7 @@ struct ruvd_msg {
union {
struct ruvd_h264 h264;
+ struct ruvd_h265 h265;
struct ruvd_vc1 vc1;
struct ruvd_mpeg2 mpeg2;
struct ruvd_mpeg4 mpeg4;
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index a6567379fe3..7eab974a3df 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -47,6 +47,8 @@
#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))
#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
+#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
+#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
/**
* flush commands to the hardware
@@ -54,6 +56,8 @@
static void flush(struct rvce_encoder *enc)
{
enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL, 0);
+ enc->task_info_idx = 0;
+ enc->bs_idx = 0;
}
#if 0
@@ -214,7 +218,7 @@ struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
* Calculate the offsets into the CPB
*/
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
- unsigned *luma_offset, unsigned *chroma_offset)
+ signed *luma_offset, signed *chroma_offset)
{
unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128);
unsigned vpitch = align(enc->luma->npix_y, 16);
@@ -278,24 +282,19 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
enc->fb = &fb;
enc->session(enc);
enc->create(enc);
- enc->rate_control(enc);
- need_rate_control = false;
- enc->config_extension(enc);
- enc->motion_estimation(enc);
- enc->rdo(enc);
- if (enc->use_vui)
- enc->vui(enc);
- enc->pic_control(enc);
+ enc->config(enc);
enc->feedback(enc);
flush(enc);
//dump_feedback(enc, &fb);
rvid_destroy_buffer(&fb);
+ need_rate_control = false;
}
- enc->session(enc);
-
- if (need_rate_control)
- enc->rate_control(enc);
+ if (need_rate_control) {
+ enc->session(enc);
+ enc->config(enc);
+ flush(enc);
+ }
}
static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
@@ -312,6 +311,8 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
RVID_ERR("Can't create feedback buffer.\n");
return;
}
+ if (!enc->cs->cdw)
+ enc->session(enc);
enc->encode(enc);
enc->feedback(enc);
}
@@ -324,7 +325,8 @@ static void rvce_end_frame(struct pipe_video_codec *encoder,
struct rvce_cpb_slot *slot = LIST_ENTRY(
struct rvce_cpb_slot, enc->cpb_slots.prev, list);
- flush(enc);
+ if (!enc->dual_inst || enc->bs_idx > 1)
+ flush(enc);
/* update the CPB backtrack with the just encoded frame */
slot->picture_type = enc->pic.picture_type;
@@ -363,6 +365,9 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
*/
static void rvce_flush(struct pipe_video_codec *encoder)
{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+
+ flush(enc);
}
static void rvce_cs_flush(void *ctx, unsigned flags,
@@ -377,6 +382,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
rvce_get_buffer get_buffer)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
struct rvce_encoder *enc;
struct pipe_video_buffer *tmp_buf, templat = {};
struct radeon_surf *tmp_surf;
@@ -395,8 +401,17 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
if (!enc)
return NULL;
+ if (rscreen->info.drm_major == 3)
+ enc->use_vm = true;
if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
enc->use_vui = true;
+ if (rscreen->info.family >= CHIP_TONGA)
+ enc->dual_pipe = true;
+ /* TODO enable B frame with dual instance */
+ if ((rscreen->info.family >= CHIP_TONGA) &&
+ (templ->max_references == 1) &&
+ (rscreen->info.vce_harvest_config == 0))
+ enc->dual_inst = true;
enc->base = *templ;
enc->base.context = context;
@@ -411,7 +426,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
enc->screen = context->screen;
enc->ws = ws;
- enc->cs = ws->cs_create(ws, RING_VCE, rvce_cs_flush, enc, NULL);
+ enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, NULL);
if (!enc->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
@@ -436,6 +451,9 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
cpb_size = cpb_size * align(tmp_surf->npix_y, 16);
cpb_size = cpb_size * 3 / 2;
cpb_size = cpb_size * enc->cpb_num;
+ if (enc->dual_pipe)
+ cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
+ RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
tmp_buf->destroy(tmp_buf);
if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't create CPB buffer.\n");
@@ -455,6 +473,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
case FW_50_0_1:
case FW_50_1_2:
+ case FW_50_10_2:
+ case FW_50_17_3:
radeon_vce_50_init(enc);
break;
@@ -482,5 +502,29 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
{
return rscreen->info.vce_fw_version == FW_40_2_2 ||
rscreen->info.vce_fw_version == FW_50_0_1 ||
- rscreen->info.vce_fw_version == FW_50_1_2;
+ rscreen->info.vce_fw_version == FW_50_1_2 ||
+ rscreen->info.vce_fw_version == FW_50_10_2 ||
+ rscreen->info.vce_fw_version == FW_50_17_3;
+}
+
+/**
+ * Add the buffer as relocation to the current command submission
+ */
+void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset)
+{
+ int reloc_idx;
+
+ reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN);
+ if (enc->use_vm) {
+ uint64_t addr;
+ addr = enc->ws->buffer_get_virtual_address(buf);
+ addr = addr + offset;
+ RVCE_CS(addr >> 32);
+ RVCE_CS(addr);
+ } else {
+ RVCE_CS(reloc_idx * 4);
+ RVCE_CS(offset);
+ }
}
diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
index 8319ef48cd5..624bda479f8 100644
--- a/src/gallium/drivers/radeon/radeon_vce.h
+++ b/src/gallium/drivers/radeon/radeon_vce.h
@@ -36,15 +36,16 @@
#include "util/list.h"
-#define RVCE_RELOC(buf, usage, domain) (enc->ws->cs_add_reloc(enc->cs, (buf), (usage), domain, RADEON_PRIO_MIN))
-
#define RVCE_CS(value) (enc->cs->buf[enc->cs->cdw++] = (value))
#define RVCE_BEGIN(cmd) { uint32_t *begin = &enc->cs->buf[enc->cs->cdw++]; RVCE_CS(cmd)
-#define RVCE_READ(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READ, domain) * 4)
-#define RVCE_WRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_WRITE, domain) * 4)
-#define RVCE_READWRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READWRITE, domain) * 4)
+#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
#define RVCE_END() *begin = (&enc->cs->buf[enc->cs->cdw] - begin) * 4; }
+#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
+#define RVCE_MAX_AUX_BUFFER_NUM 4
+
struct r600_common_screen;
/* driver dependent callback */
@@ -76,8 +77,12 @@ struct rvce_encoder {
void (*motion_estimation)(struct rvce_encoder *enc);
void (*rdo)(struct rvce_encoder *enc);
void (*vui)(struct rvce_encoder *enc);
+ void (*config)(struct rvce_encoder *enc);
void (*encode)(struct rvce_encoder *enc);
void (*destroy)(struct rvce_encoder *enc);
+ void (*task_info)(struct rvce_encoder *enc, uint32_t op,
+ uint32_t dep, uint32_t fb_idx,
+ uint32_t ring_idx);
unsigned stream_handle;
@@ -101,7 +106,14 @@ struct rvce_encoder {
struct rvid_buffer *fb;
struct rvid_buffer cpb;
struct pipe_h264_enc_picture_desc pic;
- bool use_vui;
+
+ unsigned task_info_idx;
+ unsigned bs_idx;
+
+ bool use_vm;
+ bool use_vui;
+ bool dual_pipe;
+ bool dual_inst;
};
/* CPB handling functions */
@@ -109,7 +121,7 @@ struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
- unsigned *luma_offset, unsigned *chroma_offset);
+ signed *luma_offset, signed *chroma_offset);
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
@@ -118,6 +130,10 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
+void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset);
+
/* init vce fw 40.2.2 specific callbacks */
void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
index 51b17b5f6a8..e64fbc7afb0 100644
--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -53,30 +53,38 @@ static void session(struct rvce_encoder *enc)
RVCE_END();
}
-static void task_info(struct rvce_encoder *enc, uint32_t taskOperation)
+static void task_info(struct rvce_encoder *enc, uint32_t op,
+ uint32_t dep, uint32_t fb_idx, uint32_t ring_idx)
{
RVCE_BEGIN(0x00000002); // task info
+ if (op == 0x3) {
+ if (enc->task_info_idx) {
+ uint32_t offs = enc->cs->cdw - enc->task_info_idx + 3;
+ // Update offsetOfNextTaskInfo
+ enc->cs->buf[enc->task_info_idx] = offs;
+ }
+ enc->task_info_idx = enc->cs->cdw;
+ }
RVCE_CS(0xffffffff); // offsetOfNextTaskInfo
- RVCE_CS(taskOperation); // taskOperation
- RVCE_CS(0x00000000); // referencePictureDependency
+ RVCE_CS(op); // taskOperation
+ RVCE_CS(dep); // referencePictureDependency
RVCE_CS(0x00000000); // collocateFlagDependency
- RVCE_CS(0x00000000); // feedbackIndex
- RVCE_CS(0x00000000); // videoBitstreamRingIndex
+ RVCE_CS(fb_idx); // feedbackIndex
+ RVCE_CS(ring_idx); // videoBitstreamRingIndex
RVCE_END();
}
static void feedback(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x05000005); // feedback buffer
- RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains); // feedbackRingAddressHi
- RVCE_CS(0x00000000); // feedbackRingAddressLo
+ RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo
RVCE_CS(0x00000001); // feedbackRingSize
RVCE_END();
}
static void create(struct rvce_encoder *enc)
{
- task_info(enc, 0x00000000);
+ enc->task_info(enc, 0x00000000, 0, 0, 0);
RVCE_BEGIN(0x01000001); // create cmd
RVCE_CS(0x00000000); // encUseCircularBuffer
@@ -272,21 +280,31 @@ static void vui(struct rvce_encoder *enc)
RVCE_END();
}
+static void config(struct rvce_encoder *enc)
+{
+ enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0);
+ enc->rate_control(enc);
+ enc->config_extension(enc);
+ enc->motion_estimation(enc);
+ enc->rdo(enc);
+ if (enc->use_vui)
+ enc->vui(enc);
+ enc->pic_control(enc);
+}
+
static void encode(struct rvce_encoder *enc)
{
+ signed luma_offset, chroma_offset;
int i;
- unsigned luma_offset, chroma_offset;
- task_info(enc, 0x00000003);
+ enc->task_info(enc, 0x00000003, 0, 0, 0);
RVCE_BEGIN(0x05000001); // context buffer
- RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi
- RVCE_CS(0x00000000); // encodeContextAddressLo
+ RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo
RVCE_END();
RVCE_BEGIN(0x05000004); // video bitstream buffer
- RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi
- RVCE_CS(0x00000000); // videoBitstreamRingAddressLo
+ RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0x0); // videoBitstreamRingAddressHi/Lo
RVCE_CS(enc->bs_size); // videoBitstreamRingSize
RVCE_END();
@@ -298,10 +316,10 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // insertAUD
RVCE_CS(0x00000000); // endOfSequence
RVCE_CS(0x00000000); // endOfStream
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi
- RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi
- RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
@@ -404,7 +422,7 @@ static void encode(struct rvce_encoder *enc)
static void destroy(struct rvce_encoder *enc)
{
- task_info(enc, 0x00000001);
+ enc->task_info(enc, 0x00000001, 0, 0, 0);
RVCE_BEGIN(0x02000001); // destroy
RVCE_END();
@@ -413,6 +431,7 @@ static void destroy(struct rvce_encoder *enc)
void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
{
enc->session = session;
+ enc->task_info = task_info;
enc->create = create;
enc->feedback = feedback;
enc->rate_control = rate_control;
@@ -421,6 +440,7 @@ void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
enc->motion_estimation = motion_estimation;
enc->rdo = rdo;
enc->vui = vui;
+ enc->config = config;
enc->encode = encode;
enc->destroy = destroy;
}
diff --git a/src/gallium/drivers/radeon/radeon_vce_50.c b/src/gallium/drivers/radeon/radeon_vce_50.c
index 84a2bfb117e..afdab18c0d3 100644
--- a/src/gallium/drivers/radeon/radeon_vce_50.c
+++ b/src/gallium/drivers/radeon/radeon_vce_50.c
@@ -44,18 +44,6 @@
#include "radeon_video.h"
#include "radeon_vce.h"
-static void task_info(struct rvce_encoder *enc, uint32_t taskOperation)
-{
- RVCE_BEGIN(0x00000002); // task info
- RVCE_CS(0xffffffff); // offsetOfNextTaskInfo
- RVCE_CS(taskOperation); // taskOperation
- RVCE_CS(0x00000000); // referencePictureDependency
- RVCE_CS(0x00000000); // collocateFlagDependency
- RVCE_CS(0x00000000); // feedbackIndex
- RVCE_CS(0x00000000); // videoBitstreamRingIndex
- RVCE_END();
-}
-
static void rate_control(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x04000005); // rate control
@@ -90,22 +78,46 @@ static void rate_control(struct rvce_encoder *enc)
static void encode(struct rvce_encoder *enc)
{
+ signed luma_offset, chroma_offset, bs_offset;
+ unsigned dep, bs_idx = enc->bs_idx++;
int i;
- unsigned luma_offset, chroma_offset;
- task_info(enc, 0x00000003);
+ if (enc->dual_inst) {
+ if (bs_idx == 0)
+ dep = 1;
+ else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+ dep = 0;
+ else
+ dep = 2;
+ } else
+ dep = 0;
+
+ enc->task_info(enc, 0x00000003, dep, 0, bs_idx);
RVCE_BEGIN(0x05000001); // context buffer
- RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi
- RVCE_CS(0x00000000); // encodeContextAddressLo
+ RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
RVCE_END();
+ bs_offset = -(signed)(bs_idx * enc->bs_size);
+
RVCE_BEGIN(0x05000004); // video bitstream buffer
- RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi
- RVCE_CS(0x00000000); // videoBitstreamRingAddressLo
+ RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo
RVCE_CS(enc->bs_size); // videoBitstreamRingSize
RVCE_END();
+ if (enc->dual_pipe) {
+ unsigned aux_offset = enc->cpb.res->buf->size -
+ RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
+ RVCE_BEGIN(0x05000002); // auxiliary buffer
+ for (i = 0; i < 8; ++i) {
+ RVCE_CS(aux_offset);
+ aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE;
+ }
+ for (i = 0; i < 8; ++i)
+ RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE);
+ RVCE_END();
+ }
+
RVCE_BEGIN(0x03000001); // encode
RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders
RVCE_CS(0x00000000); // pictureStructure
@@ -114,14 +126,17 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // insertAUD
RVCE_CS(0x00000000); // endOfSequence
RVCE_CS(0x00000000); // endOfStream
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi
- RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi
- RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
- RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ if (enc->dual_pipe)
+ RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ else
+ RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
RVCE_CS(0x00000000); // encInputPicTileConfig
RVCE_CS(enc->pic.picture_type); // encPicType
RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index 826e0763c08..3a1834b948f 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -214,9 +214,9 @@ int rvid_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
- return 2048;
+ return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return 1152;
+ return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
@@ -225,6 +225,8 @@ int rvid_get_video_param(struct pipe_screen *screen,
return false;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
+ case PIPE_VIDEO_CAP_STACKED_FRAMES:
+ return (rscreen->family < CHIP_TONGA) ? 1 : 2;
default:
return 0;
}
@@ -262,20 +264,28 @@ int rvid_get_video_param(struct pipe_screen *screen,
/* FIXME: VC-1 simple/main profile is broken */
return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED &&
entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ /* Carrizo only supports HEVC Main */
+ return rscreen->family >= CHIP_CARRIZO &&
+ profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
default:
return false;
}
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
- return 2048;
+ return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return 1152;
+ return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
+ return false; //The hardware doesn't support interlaced HEVC.
return true;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
+ return false; //The hardware doesn't support interlaced HEVC.
return true;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
@@ -300,6 +310,8 @@ int rvid_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
return 41;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+ return 186;
default:
return 0;
}
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 3bfbb6d75b7..7ab6e56e099 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -42,12 +42,9 @@
#include "pipebuffer/pb_buffer.h"
-#define RADEON_MAX_CMDBUF_DWORDS (16 * 1024)
-
#define RADEON_FLUSH_ASYNC (1 << 0)
#define RADEON_FLUSH_KEEP_TILING_FLAGS (1 << 1) /* needs DRM 2.12.0 */
-#define RADEON_FLUSH_COMPUTE (1 << 2)
-#define RADEON_FLUSH_END_OF_FRAME (1 << 3)
+#define RADEON_FLUSH_END_OF_FRAME (1 << 2)
/* Tiling flags. */
enum radeon_bo_layout {
@@ -136,6 +133,10 @@ enum radeon_family {
CHIP_KABINI,
CHIP_HAWAII,
CHIP_MULLINS,
+ CHIP_TONGA,
+ CHIP_ICELAND,
+ CHIP_CARRIZO,
+ CHIP_FIJI,
CHIP_LAST,
};
@@ -150,10 +151,12 @@ enum chip_class {
CAYMAN,
SI,
CIK,
+ VI,
};
enum ring_type {
RING_GFX = 0,
+ RING_COMPUTE,
RING_DMA,
RING_UVD,
RING_VCE,
@@ -169,9 +172,10 @@ enum radeon_value_id {
RADEON_NUM_BYTES_MOVED,
RADEON_VRAM_USAGE,
RADEON_GTT_USAGE,
- RADEON_GPU_TEMPERATURE,
+ RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */
RADEON_CURRENT_SCLK,
- RADEON_CURRENT_MCLK
+ RADEON_CURRENT_MCLK,
+ RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
};
enum radeon_bo_priority {
@@ -192,9 +196,11 @@ enum radeon_bo_priority {
struct winsys_handle;
struct radeon_winsys_cs_handle;
+struct radeon_winsys_ctx;
struct radeon_winsys_cs {
unsigned cdw; /* Number of used dwords. */
+ unsigned max_dw; /* Maximum number of dwords. */
uint32_t *buf; /* The command buffer. */
enum ring_type ring_type;
};
@@ -238,6 +244,7 @@ struct radeon_info {
boolean cik_macrotile_mode_array_valid;
uint32_t cik_macrotile_mode_array[16];
+ uint32_t vce_harvest_config;
};
enum radeon_feature_id {
@@ -317,6 +324,8 @@ struct radeon_surf {
struct radeon_surf_level stencil_level[RADEON_SURF_MAX_LEVEL];
uint32_t tiling_index[RADEON_SURF_MAX_LEVEL];
uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
+ uint32_t pipe_config;
+ uint32_t num_banks;
};
struct radeon_winsys {
@@ -398,24 +407,15 @@ struct radeon_winsys {
void (*buffer_unmap)(struct radeon_winsys_cs_handle *buf);
/**
- * Return TRUE if a buffer object is being used by the GPU.
- *
- * \param buf A winsys buffer object.
- * \param usage Only check whether the buffer is busy for the given usage.
- */
- boolean (*buffer_is_busy)(struct pb_buffer *buf,
- enum radeon_bo_usage usage);
-
- /**
- * Wait for a buffer object until it is not used by a GPU. This is
- * equivalent to a fence placed after the last command using the buffer,
- * and synchronizing to the fence.
+ * Wait for the buffer and return true if the buffer is not used
+ * by the device.
*
- * \param buf A winsys buffer object to wait for.
- * \param usage Only wait until the buffer is idle for the given usage,
- * but may still be busy for some other usage.
+ * The timeout of 0 will only return the status.
+ * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer
+ * is idle.
*/
- void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage);
+ bool (*buffer_wait)(struct pb_buffer *buf, uint64_t timeout,
+ enum radeon_bo_usage usage);
/**
* Return tiling flags describing a memory layout of a buffer object.
@@ -450,10 +450,11 @@ struct radeon_winsys {
struct radeon_winsys_cs *rcs,
enum radeon_bo_layout microtile,
enum radeon_bo_layout macrotile,
+ unsigned pipe_config,
unsigned bankw, unsigned bankh,
unsigned tile_split,
unsigned stencil_tile_split,
- unsigned mtilea,
+ unsigned mtilea, unsigned num_banks,
unsigned stride,
bool scanout);
@@ -515,15 +516,31 @@ struct radeon_winsys {
*************************************************************************/
/**
+ * Create a command submission context.
+ * Various command streams can be submitted to the same context.
+ */
+ struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
+
+ /**
+ * Destroy a context.
+ */
+ void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
+
+ /**
+ * Query a GPU reset status.
+ */
+ enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx);
+
+ /**
* Create a command stream.
*
- * \param ws The winsys this function is called from.
+ * \param ctx The submission context
* \param ring_type The ring type (GFX, DMA, UVD)
* \param flush Flush callback function associated with the command stream.
* \param user User pointer that will be passed to the flush callback.
* \param trace_buf Trace buffer when tracing is enabled
*/
- struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
+ struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys_ctx *ctx,
enum ring_type ring_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
@@ -668,12 +685,12 @@ struct radeon_winsys {
};
-static INLINE void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
+static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
{
cs->buf[cs->cdw++] = value;
}
-static INLINE void radeon_emit_array(struct radeon_winsys_cs *cs,
+static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
const uint32_t *values, unsigned count)
{
memcpy(cs->buf+cs->cdw, values, count * 4);
diff --git a/src/gallium/drivers/radeonsi/Automake.inc b/src/gallium/drivers/radeonsi/Automake.inc
index 8686fffd71c..5a9dcfd9fd6 100644
--- a/src/gallium/drivers/radeonsi/Automake.inc
+++ b/src/gallium/drivers/radeonsi/Automake.inc
@@ -5,10 +5,12 @@ TARGET_CPPFLAGS += -DGALLIUM_RADEONSI
TARGET_LIB_DEPS += \
$(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \
$(RADEON_LIBS) \
- $(LIBDRM_LIBS)
+ $(LIBDRM_LIBS) \
+ $(AMDGPU_LIBS)
TARGET_RADEON_WINSYS = \
- $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
+ $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
+ $(top_builddir)/src/gallium/winsys/amdgpu/drm/libamdgpuwinsys.la
TARGET_RADEON_COMMON = \
$(top_builddir)/src/gallium/drivers/radeon/libradeon.la
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index 2876c0ae735..a0b1414f4bb 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -3,6 +3,7 @@ C_SOURCES := \
si_blit.c \
si_commands.c \
si_compute.c \
+ si_cp_dma.c \
si_descriptors.c \
sid.h \
si_dma.c \
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 86111cb86e8..47b586f171e 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -27,7 +27,7 @@
#include "sid.h"
#include "si_pipe.h"
-#include "../radeon/r600_cs.h"
+#include "radeon/r600_cs.h"
#include "util/u_format.h"
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 1f2c4082dbc..48972bd170c 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -57,17 +57,19 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
+ util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader);
+ util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
if (sctx->queued.named.sample_mask) {
util_blitter_save_sample_mask(sctx->blitter,
sctx->queued.named.sample_mask->sample_mask);
}
- if (sctx->queued.named.viewport) {
- util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport);
+ if (sctx->queued.named.viewport[0]) {
+ util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
}
- if (sctx->queued.named.scissor) {
- util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor->scissor);
+ if (sctx->queued.named.scissor[0]) {
+ util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
}
util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
@@ -146,7 +148,7 @@ static void si_blit_decompress_depth(struct pipe_context *ctx,
struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
sctx->dbcb_copy_sample = sample;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
surf_tmpl.format = texture->resource.b.b.format;
surf_tmpl.u.tex.level = level;
@@ -180,7 +182,7 @@ static void si_blit_decompress_depth(struct pipe_context *ctx,
sctx->dbcb_depth_copy_enabled = false;
sctx->dbcb_stencil_copy_enabled = false;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
static void si_blit_decompress_depth_in_place(struct si_context *sctx,
@@ -192,7 +194,7 @@ static void si_blit_decompress_depth_in_place(struct si_context *sctx,
unsigned layer, max_layer, checked_last_layer, level;
sctx->db_inplace_flush_enabled = true;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
surf_tmpl.format = texture->resource.b.b.format;
@@ -230,7 +232,7 @@ static void si_blit_decompress_depth_in_place(struct si_context *sctx,
}
sctx->db_inplace_flush_enabled = false;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
void si_flush_depth_textures(struct si_context *sctx,
@@ -340,6 +342,8 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR) {
evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
&buffers, color);
+ if (!buffers)
+ return; /* all buffers have been fast cleared */
}
if (buffers & PIPE_CLEAR_COLOR) {
@@ -374,9 +378,9 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
}
zstex->depth_clear_value = depth;
- sctx->framebuffer.atom.dirty = true; /* updates DB_DEPTH_CLEAR */
+ si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
sctx->db_depth_clear = true;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
si_blitter_begin(ctx, SI_CLEAR);
@@ -389,7 +393,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
sctx->db_depth_clear = false;
sctx->db_depth_disable_expclear = false;
zstex->depth_cleared = true;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
}
@@ -455,89 +459,6 @@ struct texture_orig_info {
unsigned npix0_y;
};
-static void si_compressed_to_blittable(struct pipe_resource *tex,
- unsigned level,
- struct texture_orig_info *orig)
-{
- struct r600_texture *rtex = (struct r600_texture*)tex;
- unsigned pixsize = util_format_get_blocksize(rtex->resource.b.b.format);
- int new_format;
- int new_height, new_width;
-
- orig->format = tex->format;
- orig->width0 = tex->width0;
- orig->height0 = tex->height0;
- orig->npix0_x = rtex->surface.level[0].npix_x;
- orig->npix0_y = rtex->surface.level[0].npix_y;
- orig->npix_x = rtex->surface.level[level].npix_x;
- orig->npix_y = rtex->surface.level[level].npix_y;
-
- if (pixsize == 8)
- new_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
- else
- new_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
-
- new_width = util_format_get_nblocksx(tex->format, orig->width0);
- new_height = util_format_get_nblocksy(tex->format, orig->height0);
-
- tex->width0 = new_width;
- tex->height0 = new_height;
- tex->format = new_format;
- rtex->surface.level[0].npix_x = util_format_get_nblocksx(orig->format, orig->npix0_x);
- rtex->surface.level[0].npix_y = util_format_get_nblocksy(orig->format, orig->npix0_y);
- rtex->surface.level[level].npix_x = util_format_get_nblocksx(orig->format, orig->npix_x);
- rtex->surface.level[level].npix_y = util_format_get_nblocksy(orig->format, orig->npix_y);
-
- /* By dividing the dimensions by 4, we effectively decrement
- * last_level by 2, therefore the last 2 mipmap levels disappear and
- * aren't blittable. Note that the last 3 mipmap levels (4x4, 2x2,
- * 1x1) have equal slice sizes, which is an important assumption
- * for this to work.
- *
- * In order to make the last 2 mipmap levels blittable, we have to
- * add the slice size of the last mipmap level to the texture
- * address, so that even though the hw thinks it reads last_level-2,
- * it will actually read last_level-1, and if we add the slice size*2,
- * it will read last_level. That's how this workaround works.
- */
- if (level > rtex->resource.b.b.last_level-2)
- rtex->mipmap_shift = level - (rtex->resource.b.b.last_level-2);
-}
-
-static void si_change_format(struct pipe_resource *tex,
- unsigned level,
- struct texture_orig_info *orig,
- enum pipe_format format)
-{
- struct r600_texture *rtex = (struct r600_texture*)tex;
-
- orig->format = tex->format;
- orig->width0 = tex->width0;
- orig->height0 = tex->height0;
- orig->npix0_x = rtex->surface.level[0].npix_x;
- orig->npix0_y = rtex->surface.level[0].npix_y;
- orig->npix_x = rtex->surface.level[level].npix_x;
- orig->npix_y = rtex->surface.level[level].npix_y;
-
- tex->format = format;
-}
-
-static void si_reset_blittable_to_orig(struct pipe_resource *tex,
- unsigned level,
- struct texture_orig_info *orig)
-{
- struct r600_texture *rtex = (struct r600_texture*)tex;
-
- tex->format = orig->format;
- tex->width0 = orig->width0;
- tex->height0 = orig->height0;
- rtex->surface.level[0].npix_x = orig->npix0_x;
- rtex->surface.level[0].npix_y = orig->npix0_y;
- rtex->surface.level[level].npix_x = orig->npix_x;
- rtex->surface.level[level].npix_y = orig->npix_y;
- rtex->mipmap_shift = 0;
-}
-
void si_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
@@ -547,114 +468,116 @@ void si_resource_copy_region(struct pipe_context *ctx,
const struct pipe_box *src_box)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct r600_texture *rdst = (struct r600_texture*)dst;
struct pipe_surface *dst_view, dst_templ;
struct pipe_sampler_view src_templ, *src_view;
- struct texture_orig_info orig_info[2];
+ unsigned dst_width, dst_height, src_width0, src_height0;
+ unsigned src_force_level = 0;
struct pipe_box sbox, dstbox;
- boolean restore_orig[2];
- /* Fallback for buffers. */
+ /* Handle buffers first. */
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
return;
}
- memset(orig_info, 0, sizeof(orig_info));
+ assert(u_max_sample(dst) == u_max_sample(src));
/* The driver doesn't decompress resources automatically while
* u_blitter is rendering. */
si_decompress_subresource(ctx, src, src_level,
src_box->z, src_box->z + src_box->depth - 1);
- restore_orig[0] = restore_orig[1] = FALSE;
+ dst_width = u_minify(dst->width0, dst_level);
+ dst_height = u_minify(dst->height0, dst_level);
+ src_width0 = src->width0;
+ src_height0 = src->height0;
+
+ util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
+ util_blitter_default_src_texture(&src_templ, src, src_level);
if (util_format_is_compressed(src->format) &&
util_format_is_compressed(dst->format)) {
- si_compressed_to_blittable(src, src_level, &orig_info[0]);
- restore_orig[0] = TRUE;
- sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x);
- sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y);
+ unsigned blocksize = util_format_get_blocksize(src->format);
+
+ if (blocksize == 8)
+ src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
+ else
+ src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
+ dst_templ.format = src_templ.format;
+
+ dst_width = util_format_get_nblocksx(dst->format, dst_width);
+ dst_height = util_format_get_nblocksy(dst->format, dst_height);
+ src_width0 = util_format_get_nblocksx(src->format, src_width0);
+ src_height0 = util_format_get_nblocksy(src->format, src_height0);
+
+ dstx = util_format_get_nblocksx(dst->format, dstx);
+ dsty = util_format_get_nblocksy(dst->format, dsty);
+
+ sbox.x = util_format_get_nblocksx(src->format, src_box->x);
+ sbox.y = util_format_get_nblocksy(src->format, src_box->y);
sbox.z = src_box->z;
- sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width);
- sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height);
+ sbox.width = util_format_get_nblocksx(src->format, src_box->width);
+ sbox.height = util_format_get_nblocksy(src->format, src_box->height);
sbox.depth = src_box->depth;
src_box = &sbox;
- si_compressed_to_blittable(dst, dst_level, &orig_info[1]);
- restore_orig[1] = TRUE;
- /* translate the dst box as well */
- dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
- dsty = util_format_get_nblocksy(orig_info[1].format, dsty);
- } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) {
+ src_force_level = src_level;
+ } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) ||
+ /* also *8_SNORM has precision issues, use UNORM instead */
+ util_format_is_snorm(src->format)) {
if (util_format_is_subsampled_422(src->format)) {
- /* XXX untested */
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8G8B8A8_UINT);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8G8B8A8_UINT);
+ src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
+ dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
+
+ dst_width = util_format_get_nblocksx(dst->format, dst_width);
+ src_width0 = util_format_get_nblocksx(src->format, src_width0);
+
+ dstx = util_format_get_nblocksx(dst->format, dstx);
sbox = *src_box;
- sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x);
- sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width);
+ sbox.x = util_format_get_nblocksx(src->format, src_box->x);
+ sbox.width = util_format_get_nblocksx(src->format, src_box->width);
src_box = &sbox;
- dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
-
- restore_orig[0] = TRUE;
- restore_orig[1] = TRUE;
} else {
unsigned blocksize = util_format_get_blocksize(src->format);
switch (blocksize) {
case 1:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8_UNORM);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8_UNORM);
+ dst_templ.format = PIPE_FORMAT_R8_UNORM;
+ src_templ.format = PIPE_FORMAT_R8_UNORM;
break;
case 2:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8G8_UNORM);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8G8_UNORM);
+ dst_templ.format = PIPE_FORMAT_R8G8_UNORM;
+ src_templ.format = PIPE_FORMAT_R8G8_UNORM;
break;
case 4:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8G8B8A8_UNORM);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8G8B8A8_UNORM);
+ dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
break;
case 8:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R16G16B16A16_UINT);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R16G16B16A16_UINT);
+ dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
+ src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
break;
case 16:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R32G32B32A32_UINT);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R32G32B32A32_UINT);
+ dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
+ src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
break;
default:
fprintf(stderr, "Unhandled format %s with blocksize %u\n",
util_format_short_name(src->format), blocksize);
assert(0);
}
- restore_orig[0] = TRUE;
- restore_orig[1] = TRUE;
}
}
/* Initialize the surface. */
- util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
dst_view = r600_create_surface_custom(ctx, dst, &dst_templ,
- rdst->surface.level[dst_level].npix_x,
- rdst->surface.level[dst_level].npix_y);
+ dst_width, dst_height);
/* Initialize the sampler view. */
- util_blitter_default_src_texture(&src_templ, src, src_level);
- src_view = ctx->create_sampler_view(ctx, src, &src_templ);
+ src_view = si_create_sampler_view_custom(ctx, src, &src_templ,
+ src_width0, src_height0,
+ src_force_level);
u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height),
abs(src_box->depth), &dstbox);
@@ -662,18 +585,12 @@ void si_resource_copy_region(struct pipe_context *ctx,
/* Copy. */
si_blitter_begin(ctx, SI_COPY);
util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox,
- src_view, src_box, src->width0, src->height0,
+ src_view, src_box, src_width0, src_height0,
PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
si_blitter_end(ctx);
pipe_surface_reference(&dst_view, NULL);
pipe_sampler_view_reference(&src_view, NULL);
-
- if (restore_orig[0])
- si_reset_blittable_to_orig(src, src_level, &orig_info[0]);
-
- if (restore_orig[1])
- si_reset_blittable_to_orig(dst, dst_level, &orig_info[1]);
}
/* For MSAA integer resolving to work, we change the format to NORM using this function. */
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 89bef2e7afd..d4fe5653687 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -137,14 +137,14 @@ static void *si_create_compute_state(
}
#else
- radeon_elf_read(code, header->num_bytes, &program->shader.binary, true);
+ radeon_elf_read(code, header->num_bytes, &program->shader.binary);
/* init_scratch_buffer patches the shader code with the scratch address,
* so we need to call it before si_shader_binary_read() which uploads
* the shader code to the GPU.
*/
init_scratch_buffer(sctx, program);
- si_shader_binary_read(sctx->screen, &program->shader, &program->shader.binary);
+ si_shader_binary_read(sctx->screen, &program->shader);
#endif
program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
@@ -309,8 +309,6 @@ static void si_launch_grid(
kernel_args[i]);
}
- sctx->b.ws->buffer_unmap(input_buffer->cs_buf);
-
kernel_args_va = input_buffer->gpu_address;
kernel_args_va += kernel_args_offset;
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
new file mode 100644
index 00000000000..f8a9da45a10
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Marek Olšák <[email protected]>
+ */
+
+#include "si_pipe.h"
+#include "sid.h"
+#include "radeon/r600_cs.h"
+
+
+/* Set this if you want the 3D engine to wait until CP DMA is done.
+ * It should be set on the last CP DMA packet. */
+#define R600_CP_DMA_SYNC (1 << 0) /* R600+ */
+
+/* Set this if the source data was used as a destination in a previous CP DMA
+ * packet. It's for preventing a read-after-write (RAW) hazard between two
+ * CP DMA packets. */
+#define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */
+#define CIK_CP_DMA_USE_L2 (1 << 2)
+
+/* Emit a CP DMA packet to do a copy from one buffer to another.
+ * The size must fit in bits [20:0].
+ */
+static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
+ uint64_t dst_va, uint64_t src_va,
+ unsigned size, unsigned flags)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
+ uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+ uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
+ PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0;
+
+ assert(size);
+ assert((size & ((1<<21)-1)) == size);
+
+ if (sctx->b.chip_class >= CIK) {
+ radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+ radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */
+ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
+ radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ } else {
+ radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
+ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
+ radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ }
+}
+
+/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
+static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
+ uint64_t dst_va, unsigned size,
+ uint32_t clear_value, unsigned flags)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
+ uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+ uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0;
+
+ assert(size);
+ assert((size & ((1<<21)-1)) == size);
+
+ if (sctx->b.chip_class >= CIK) {
+ radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+ radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+ radeon_emit(cs, clear_value); /* DATA [31:0] */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ } else {
+ radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
+ radeon_emit(cs, clear_value); /* DATA [31:0] */
+ radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ }
+}
+
+/* The max number of bytes to copy per packet. */
+#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+
+static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer)
+{
+ struct si_context *sctx = (struct si_context*)ctx;
+ unsigned flush_flags, tc_l2_flag;
+
+ if (!size)
+ return;
+
+ /* Mark the buffer range of destination as valid (initialized),
+ * so that transfer_map knows it should wait for the GPU when mapping
+ * that range. */
+ util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+ offset + size);
+
+ /* Fallback for unaligned clears. */
+ if (offset % 4 != 0 || size % 4 != 0) {
+ uint32_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
+ sctx->b.rings.gfx.cs,
+ PIPE_TRANSFER_WRITE);
+ size /= 4;
+ for (unsigned i = 0; i < size; i++)
+ *map++ = value;
+ return;
+ }
+
+ uint64_t va = r600_resource(dst)->gpu_address + offset;
+
+ /* Flush the caches where the resource is bound. */
+ if (is_framebuffer) {
+ flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ tc_l2_flag = 0;
+ } else {
+ flush_flags = SI_CONTEXT_INV_TC_L1 |
+ (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
+ SI_CONTEXT_INV_KCACHE;
+ tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ }
+
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ flush_flags;
+
+ while (size) {
+ unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+ unsigned dma_flags = tc_l2_flag;
+
+ si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
+ FALSE);
+
+ /* This must be done after need_cs_space. */
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ (struct r600_resource*)dst, RADEON_USAGE_WRITE,
+ RADEON_PRIO_MIN);
+
+ /* Flush the caches for the first copy only.
+ * Also wait for the previous CP DMA operations. */
+ if (sctx->b.flags) {
+ si_emit_cache_flush(&sctx->b, NULL);
+ dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
+ }
+
+ /* Do the synchronization after the last copy, so that all data is written to memory. */
+ if (size == byte_count)
+ dma_flags |= R600_CP_DMA_SYNC;
+
+ /* Emit the clear packet. */
+ si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value, dma_flags);
+
+ size -= byte_count;
+ va += byte_count;
+ }
+
+ /* Flush the caches again in case the 3D engine has been prefetching
+ * the resource. */
+ sctx->b.flags |= flush_flags;
+
+ if (tc_l2_flag)
+ r600_resource(dst)->TC_L2_dirty = true;
+}
+
+void si_copy_buffer(struct si_context *sctx,
+ struct pipe_resource *dst, struct pipe_resource *src,
+ uint64_t dst_offset, uint64_t src_offset, unsigned size,
+ bool is_framebuffer)
+{
+ unsigned flush_flags, tc_l2_flag;
+
+ if (!size)
+ return;
+
+ /* Mark the buffer range of destination as valid (initialized),
+ * so that transfer_map knows it should wait for the GPU when mapping
+ * that range. */
+ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
+ dst_offset + size);
+
+ dst_offset += r600_resource(dst)->gpu_address;
+ src_offset += r600_resource(src)->gpu_address;
+
+ /* Flush the caches where the resource is bound. */
+ if (is_framebuffer) {
+ flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ tc_l2_flag = 0;
+ } else {
+ flush_flags = SI_CONTEXT_INV_TC_L1 |
+ (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
+ SI_CONTEXT_INV_KCACHE;
+ tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ }
+
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ flush_flags;
+
+ while (size) {
+ unsigned sync_flags = tc_l2_flag;
+ unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+
+ si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
+
+ /* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
+ if (sctx->b.flags) {
+ si_emit_cache_flush(&sctx->b, NULL);
+ sync_flags |= SI_CP_DMA_RAW_WAIT;
+ }
+
+ /* Do the synchronization after the last copy, so that all data is written to memory. */
+ if (size == byte_count) {
+ sync_flags |= R600_CP_DMA_SYNC;
+ }
+
+ /* This must be done after r600_need_cs_space. */
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
+ RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
+ RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+
+ si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
+
+ size -= byte_count;
+ src_offset += byte_count;
+ dst_offset += byte_count;
+ }
+
+ /* Flush the caches again in case the 3D engine has been prefetching
+ * the resource. */
+ sctx->b.flags |= flush_flags;
+
+ if (tc_l2_flag)
+ r600_resource(dst)->TC_L2_dirty = true;
+}
+
+void si_init_cp_dma_functions(struct si_context *sctx)
+{
+ sctx->b.clear_buffer = si_clear_buffer;
+}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index bbfd36dcbeb..890be071596 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -24,14 +24,23 @@
* Marek Olšák <[email protected]>
*/
-/* Resource binding slots and sampler states (each described with 8 or 4 dwords)
- * live in memory on SI.
+/* Resource binding slots and sampler states (each described with 8 or
+ * 4 dwords) are stored in lists in memory which is accessed by shaders
+ * using scalar load instructions.
*
- * This file is responsible for managing lists of resources and sampler states
- * in memory and binding them, which means updating those structures in memory.
+ * This file is responsible for managing such lists. It keeps a copy of all
+ * descriptors in CPU memory and re-uploads a whole list if some slots have
+ * been changed.
*
- * There is also code for updating shader pointers to resources and sampler
- * states. CP DMA functions are here too.
+ * This code is also reponsible for updating shader pointers to those lists.
+ *
+ * Note that CP DMA can't be used for updating the lists, because a GPU hang
+ * could leave the list in a mid-IB state and the next IB would get wrong
+ * descriptors and the whole context would be unusable at that point.
+ * (Note: The register shadowing can't be used due to the same reason)
+ *
+ * Also, uploading descriptors to newly allocated memory doesn't require
+ * a KCACHE flush.
*/
#include "radeon/r600_cs.h"
@@ -42,7 +51,6 @@
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
-#define SI_NUM_CONTEXTS 16
/* NULL image and buffer descriptor.
*
@@ -64,284 +72,62 @@ static uint32_t null_descriptor[8] = {
* descriptor */
};
-/* Set this if you want the 3D engine to wait until CP DMA is done.
- * It should be set on the last CP DMA packet. */
-#define R600_CP_DMA_SYNC (1 << 0) /* R600+ */
-
-/* Set this if the source data was used as a destination in a previous CP DMA
- * packet. It's for preventing a read-after-write (RAW) hazard between two
- * CP DMA packets. */
-#define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */
-#define CIK_CP_DMA_USE_L2 (1 << 2)
-
-/* Emit a CP DMA packet to do a copy from one buffer to another.
- * The size must fit in bits [20:0].
- */
-static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
- uint64_t dst_va, uint64_t src_va,
- unsigned size, unsigned flags)
-{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
- uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
- uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
- PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0;
-
- assert(size);
- assert((size & ((1<<21)-1)) == size);
-
- if (sctx->b.chip_class >= CIK) {
- radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
- radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */
- radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
- radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
- } else {
- radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
- radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
- radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
- }
-}
-
-/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
-static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
- uint64_t dst_va, unsigned size,
- uint32_t clear_value, unsigned flags)
-{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
- uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
- uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0;
-
- assert(size);
- assert((size & ((1<<21)-1)) == size);
-
- if (sctx->b.chip_class >= CIK) {
- radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
- radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
- radeon_emit(cs, clear_value); /* DATA [31:0] */
- radeon_emit(cs, 0);
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
- } else {
- radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
- radeon_emit(cs, clear_value); /* DATA [31:0] */
- radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
- }
-}
-
-static void si_init_descriptors(struct si_context *sctx,
- struct si_descriptors *desc,
- unsigned shader_userdata_reg,
+static void si_init_descriptors(struct si_descriptors *desc,
+ unsigned shader_userdata_index,
unsigned element_dw_size,
- unsigned num_elements,
- void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
+ unsigned num_elements)
{
+ int i;
+
assert(num_elements <= sizeof(desc->enabled_mask)*8);
- assert(num_elements <= sizeof(desc->dirty_mask)*8);
- desc->atom.emit = (void*)emit_func;
- desc->shader_userdata_reg = shader_userdata_reg;
+ desc->list = CALLOC(num_elements, element_dw_size * 4);
desc->element_dw_size = element_dw_size;
desc->num_elements = num_elements;
- desc->context_size = num_elements * element_dw_size * 4;
-
- desc->buffer = (struct r600_resource*)
- pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT,
- SI_NUM_CONTEXTS * desc->context_size);
-
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
-
- /* We don't check for CS space here, because this should be called
- * only once at context initialization. */
- si_emit_cp_dma_clear_buffer(sctx, desc->buffer->gpu_address,
- desc->buffer->b.b.width0, 0,
- R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2);
+ desc->list_dirty = true; /* upload the list before the next draw */
+ desc->shader_userdata_offset = shader_userdata_index * 4;
+
+ /* Initialize the array to NULL descriptors if the element size is 8. */
+ if (element_dw_size == 8)
+ for (i = 0; i < num_elements; i++)
+ memcpy(desc->list + i*element_dw_size, null_descriptor,
+ sizeof(null_descriptor));
}
static void si_release_descriptors(struct si_descriptors *desc)
{
pipe_resource_reference((struct pipe_resource**)&desc->buffer, NULL);
+ FREE(desc->list);
}
-static void si_update_descriptors(struct si_context *sctx,
+static bool si_upload_descriptors(struct si_context *sctx,
struct si_descriptors *desc)
{
- if (desc->dirty_mask) {
- desc->atom.num_dw =
- 7 + /* copy */
- (4 + desc->element_dw_size) * util_bitcount64(desc->dirty_mask) + /* update */
- 4; /* pointer update */
-
- if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
- desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0)
- desc->atom.num_dw += 4; /* second pointer update */
-
- desc->atom.dirty = true;
-
- /* TODO: Investigate if these flushes can be removed after
- * adding CE support. */
-
- /* The descriptors are read with the K cache. */
- sctx->b.flags |= SI_CONTEXT_INV_KCACHE;
-
- /* Since SI uses uncached CP DMA to update descriptors,
- * we have to flush TC L2, which is used to fetch constants
- * along with KCACHE. */
- if (sctx->b.chip_class == SI)
- sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
- } else {
- desc->atom.dirty = false;
- }
-}
+ unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
+ void *ptr;
-static void si_emit_shader_pointer(struct si_context *sctx,
- struct r600_atom *atom)
-{
- struct si_descriptors *desc = (struct si_descriptors*)atom;
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint64_t va = desc->buffer->gpu_address +
- desc->current_context_id * desc->context_size +
- desc->buffer_offset;
+ if (!desc->list_dirty)
+ return true;
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
- radeon_emit(cs, (desc->shader_userdata_reg - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ u_upload_alloc(sctx->b.uploader, 0, list_size,
+ &desc->buffer_offset,
+ (struct pipe_resource**)&desc->buffer, &ptr);
+ if (!desc->buffer)
+ return false; /* skip the draw call */
- if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
- desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0) {
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
- radeon_emit(cs, (desc->shader_userdata_reg +
- (R_00B330_SPI_SHADER_USER_DATA_ES_0 -
- R_00B130_SPI_SHADER_USER_DATA_VS_0) -
- SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- }
-}
+ util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
-static void si_emit_descriptors(struct si_context *sctx,
- struct si_descriptors *desc,
- uint32_t **descriptors)
-{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint64_t va_base;
- int packet_start = 0;
- int packet_size = 0;
- int last_index = desc->num_elements; /* point to a non-existing element */
- uint64_t dirty_mask = desc->dirty_mask;
- unsigned new_context_id = (desc->current_context_id + 1) % SI_NUM_CONTEXTS;
-
- assert(dirty_mask);
-
- va_base = desc->buffer->gpu_address;
-
- /* Copy the descriptors to a new context slot. */
- si_emit_cp_dma_copy_buffer(sctx,
- va_base + new_context_id * desc->context_size,
- va_base + desc->current_context_id * desc->context_size,
- desc->context_size, R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2);
-
- va_base += new_context_id * desc->context_size;
-
- /* Update the descriptors.
- * Updates of consecutive descriptors are merged to one WRITE_DATA packet.
- *
- * XXX When unbinding lots of resources, consider clearing the memory
- * with CP DMA instead of emitting zeros.
- */
- while (dirty_mask) {
- int i = u_bit_scan64(&dirty_mask);
-
- assert(i < desc->num_elements);
-
- if (last_index+1 == i && packet_size) {
- /* Append new data at the end of the last packet. */
- packet_size += desc->element_dw_size;
- cs->buf[packet_start] = PKT3(PKT3_WRITE_DATA, packet_size, 0);
- } else {
- /* Start a new packet. */
- uint64_t va = va_base + i * desc->element_dw_size * 4;
-
- packet_start = cs->cdw;
- packet_size = 2 + desc->element_dw_size;
-
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, packet_size, 0));
- radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(sctx->b.chip_class == SI ?
- PKT3_WRITE_DATA_DST_SEL_MEM_SYNC :
- PKT3_WRITE_DATA_DST_SEL_TC_L2) |
- PKT3_WRITE_DATA_WR_CONFIRM |
- PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
- radeon_emit(cs, va & 0xFFFFFFFFUL);
- radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL);
- }
-
- radeon_emit_array(cs, descriptors[i], desc->element_dw_size);
-
- last_index = i;
- }
-
- desc->dirty_mask = 0;
- desc->current_context_id = new_context_id;
-
- /* Now update the shader userdata pointer. */
- si_emit_shader_pointer(sctx, &desc->atom);
-}
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
+ RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
-static unsigned si_get_shader_user_data_base(unsigned shader)
-{
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- return R_00B130_SPI_SHADER_USER_DATA_VS_0;
- case PIPE_SHADER_GEOMETRY:
- return R_00B230_SPI_SHADER_USER_DATA_GS_0;
- case PIPE_SHADER_FRAGMENT:
- return R_00B030_SPI_SHADER_USER_DATA_PS_0;
- default:
- assert(0);
- return 0;
- }
+ desc->list_dirty = false;
+ desc->pointer_dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
+ return true;
}
/* SAMPLER VIEWS */
-static void si_emit_sampler_views(struct si_context *sctx, struct r600_atom *atom)
-{
- struct si_sampler_views *views = (struct si_sampler_views*)atom;
-
- si_emit_descriptors(sctx, &views->desc, views->desc_data);
-}
-
-static void si_init_sampler_views(struct si_context *sctx,
- struct si_sampler_views *views,
- unsigned shader)
-{
- int i;
-
- si_init_descriptors(sctx, &views->desc,
- si_get_shader_user_data_base(shader) +
- SI_SGPR_RESOURCE * 4,
- 8, SI_NUM_SAMPLER_VIEWS, si_emit_sampler_views);
-
- for (i = 0; i < views->desc.num_elements; i++) {
- views->desc_data[i] = null_descriptor;
- views->desc.dirty_mask |= 1llu << i;
- }
- si_update_descriptors(sctx, &views->desc);
-}
-
static void si_release_sampler_views(struct si_sampler_views *views)
{
int i;
@@ -382,10 +168,10 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
si_get_resource_ro_priority(rview->resource));
}
+ if (!views->desc.buffer)
+ return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
-
- si_emit_shader_pointer(sctx, &views->desc.atom);
}
static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
@@ -406,17 +192,16 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
rview->resource, RADEON_USAGE_READ,
si_get_resource_ro_priority(rview->resource));
-
pipe_sampler_view_reference(&views->views[slot], view);
- views->desc_data[slot] = view_desc;
+ memcpy(views->desc.list + slot*8, view_desc, 8*4);
views->desc.enabled_mask |= 1llu << slot;
} else {
pipe_sampler_view_reference(&views->views[slot], NULL);
- views->desc_data[slot] = null_descriptor;
+ memcpy(views->desc.list + slot*8, null_descriptor, 8*4);
views->desc.enabled_mask &= ~(1llu << slot);
}
- views->desc.dirty_mask |= 1llu << slot;
+ views->desc.list_dirty = true;
}
static void si_set_sampler_views(struct pipe_context *ctx,
@@ -475,25 +260,17 @@ static void si_set_sampler_views(struct pipe_context *ctx,
NULL, NULL);
}
}
-
- si_update_descriptors(sctx, &samplers->views.desc);
}
/* SAMPLER STATES */
-static void si_emit_sampler_states(struct si_context *sctx, struct r600_atom *atom)
-{
- struct si_sampler_states *states = (struct si_sampler_states*)atom;
-
- si_emit_descriptors(sctx, &states->desc, states->desc_data);
-}
-
static void si_sampler_states_begin_new_cs(struct si_context *sctx,
struct si_sampler_states *states)
{
+ if (!states->desc.buffer)
+ return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
- si_emit_shader_pointer(sctx, &states->desc.atom);
}
void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
@@ -513,66 +290,39 @@ void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
for (i = 0; i < count; i++) {
unsigned slot = start + i;
- if (!sstates[i]) {
- samplers->desc.dirty_mask &= ~(1llu << slot);
+ if (!sstates[i])
continue;
- }
- samplers->desc_data[slot] = sstates[i]->val;
- samplers->desc.dirty_mask |= 1llu << slot;
+ memcpy(samplers->desc.list + slot*4, sstates[i]->val, 4*4);
+ samplers->desc.list_dirty = true;
}
-
- si_update_descriptors(sctx, &samplers->desc);
}
/* BUFFER RESOURCES */
-static void si_emit_buffer_resources(struct si_context *sctx, struct r600_atom *atom)
-{
- struct si_buffer_resources *buffers = (struct si_buffer_resources*)atom;
-
- si_emit_descriptors(sctx, &buffers->desc, buffers->desc_data);
-}
-
-static void si_init_buffer_resources(struct si_context *sctx,
- struct si_buffer_resources *buffers,
- unsigned num_buffers, unsigned shader,
+static void si_init_buffer_resources(struct si_buffer_resources *buffers,
+ unsigned num_buffers,
unsigned shader_userdata_index,
enum radeon_bo_usage shader_usage,
enum radeon_bo_priority priority)
{
- int i;
-
- buffers->num_buffers = num_buffers;
buffers->shader_usage = shader_usage;
buffers->priority = priority;
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
- buffers->desc_storage = CALLOC(num_buffers, sizeof(uint32_t) * 4);
-
- /* si_emit_descriptors only accepts an array of arrays.
- * This adds such an array. */
- buffers->desc_data = CALLOC(num_buffers, sizeof(uint32_t*));
- for (i = 0; i < num_buffers; i++) {
- buffers->desc_data[i] = &buffers->desc_storage[i*4];
- }
- si_init_descriptors(sctx, &buffers->desc,
- si_get_shader_user_data_base(shader) +
- shader_userdata_index*4, 4, num_buffers,
- si_emit_buffer_resources);
+ si_init_descriptors(&buffers->desc, shader_userdata_index, 4,
+ num_buffers);
}
static void si_release_buffer_resources(struct si_buffer_resources *buffers)
{
int i;
- for (i = 0; i < buffers->num_buffers; i++) {
+ for (i = 0; i < buffers->desc.num_elements; i++) {
pipe_resource_reference(&buffers->buffers[i], NULL);
}
FREE(buffers->buffers);
- FREE(buffers->desc_storage);
- FREE(buffers->desc_data);
si_release_descriptors(&buffers->desc);
}
@@ -590,11 +340,11 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
buffers->shader_usage, buffers->priority);
}
+ if (!buffers->desc.buffer)
+ return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
buffers->desc.buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_DATA);
-
- si_emit_shader_pointer(sctx, &buffers->desc.atom);
}
/* VERTEX BUFFERS */
@@ -617,14 +367,15 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
(struct r600_resource*)sctx->vertex_buffer[vb].buffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
}
+
+ if (!desc->buffer)
+ return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
-
- si_emit_shader_pointer(sctx, &desc->atom);
}
-void si_update_vertex_buffers(struct si_context *sctx)
+static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
{
struct si_descriptors *desc = &sctx->vertex_buffers;
bool bound[SI_NUM_VERTEX_BUFFERS] = {};
@@ -632,8 +383,10 @@ void si_update_vertex_buffers(struct si_context *sctx)
uint64_t va;
uint32_t *ptr;
+ if (!sctx->vertex_buffers_dirty)
+ return true;
if (!count || !sctx->vertex_elements)
- return;
+ return true;
/* Vertex buffer descriptors are the only ones which are uploaded
* directly through a staging buffer and don't go through
@@ -641,13 +394,14 @@ void si_update_vertex_buffers(struct si_context *sctx)
*/
u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
(struct pipe_resource**)&desc->buffer, (void**)&ptr);
+ if (!desc->buffer)
+ return false;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
assert(count <= SI_NUM_VERTEX_BUFFERS);
- assert(desc->current_context_id == 0);
for (i = 0; i < count; i++) {
struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
@@ -675,7 +429,8 @@ void si_update_vertex_buffers(struct si_context *sctx)
desc[0] = va & 0xFFFFFFFF;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(vb->stride);
- if (vb->stride)
+
+ if (sctx->b.chip_class <= CIK && vb->stride)
/* Round up by rounding down and adding 1 */
desc[2] = (vb->buffer->width0 - offset -
sctx->vertex_elements->format_size[i]) /
@@ -693,13 +448,14 @@ void si_update_vertex_buffers(struct si_context *sctx)
}
}
- desc->atom.num_dw = 8; /* update 2 shader pointers (VS+ES) */
- desc->atom.dirty = true;
-
/* Don't flush the const cache. It would have a very negative effect
* on performance (confirmed by testing). New descriptors are always
* uploaded to a fresh new buffer, so I don't think flushing the const
* cache is needed. */
+ desc->pointer_dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
+ sctx->vertex_buffers_dirty = false;
+ return true;
}
@@ -724,7 +480,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
if (shader >= SI_NUM_SHADERS)
return;
- assert(slot < buffers->num_buffers);
+ assert(slot < buffers->desc.num_elements);
pipe_resource_reference(&buffers->buffers[slot], NULL);
/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
@@ -751,7 +507,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
}
/* Set the descriptor. */
- uint32_t *desc = buffers->desc_data[slot];
+ uint32_t *desc = buffers->desc.list + slot*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(0);
@@ -770,12 +526,11 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
buffers->desc.enabled_mask |= 1llu << slot;
} else {
/* Clear the descriptor. */
- memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
+ memset(buffers->desc.list + slot*4, 0, sizeof(uint32_t) * 4);
buffers->desc.enabled_mask &= ~(1llu << slot);
}
- buffers->desc.dirty_mask |= 1llu << slot;
- si_update_descriptors(sctx, &buffers->desc);
+ buffers->desc.list_dirty = true;
}
/* RING BUFFERS */
@@ -784,7 +539,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
struct pipe_resource *buffer,
unsigned stride, unsigned num_records,
bool add_tid, bool swizzle,
- unsigned element_size, unsigned index_stride)
+ unsigned element_size, unsigned index_stride, uint64_t offset)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
@@ -795,13 +550,13 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
/* The stride field in the resource descriptor has 14 bits */
assert(stride < (1 << 14));
- assert(slot < buffers->num_buffers);
+ assert(slot < buffers->desc.num_elements);
pipe_resource_reference(&buffers->buffers[slot], NULL);
if (buffer) {
uint64_t va;
- va = r600_resource(buffer)->gpu_address;
+ va = r600_resource(buffer)->gpu_address + offset;
switch (element_size) {
default:
@@ -839,8 +594,11 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
break;
}
+ if (sctx->b.chip_class >= VI && stride)
+ num_records *= stride;
+
/* Set the descriptor. */
- uint32_t *desc = buffers->desc_data[slot];
+ uint32_t *desc = buffers->desc.list + slot*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(stride) |
@@ -863,12 +621,11 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
buffers->desc.enabled_mask |= 1llu << slot;
} else {
/* Clear the descriptor. */
- memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
+ memset(buffers->desc.list + slot*4, 0, sizeof(uint32_t) * 4);
buffers->desc.enabled_mask &= ~(1llu << slot);
}
- buffers->desc.dirty_mask |= 1llu << slot;
- si_update_descriptors(sctx, &buffers->desc);
+ buffers->desc.list_dirty = true;
}
/* STREAMOUT BUFFERS */
@@ -929,15 +686,21 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
struct pipe_resource *buffer = targets[i]->buffer;
uint64_t va = r600_resource(buffer)->gpu_address;
- /* Set the descriptor. */
- uint32_t *desc = buffers->desc_data[bufidx];
+ /* Set the descriptor.
+ *
+ * On VI, the format must be non-INVALID, otherwise
+ * the buffer will be considered not bound and store
+ * instructions will be no-ops.
+ */
+ uint32_t *desc = buffers->desc.list + bufidx*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
desc[2] = 0xffffffff;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
/* Set the resource. */
pipe_resource_reference(&buffers->buffers[bufidx],
@@ -948,24 +711,22 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
buffers->desc.enabled_mask |= 1llu << bufidx;
} else {
/* Clear the descriptor and unset the resource. */
- memset(buffers->desc_data[bufidx], 0,
+ memset(buffers->desc.list + bufidx*4, 0,
sizeof(uint32_t) * 4);
pipe_resource_reference(&buffers->buffers[bufidx],
NULL);
buffers->desc.enabled_mask &= ~(1llu << bufidx);
}
- buffers->desc.dirty_mask |= 1llu << bufidx;
}
for (; i < old_num_targets; i++) {
bufidx = SI_SO_BUF_OFFSET + i;
/* Clear the descriptor and unset the resource. */
- memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4);
+ memset(buffers->desc.list + bufidx*4, 0, sizeof(uint32_t) * 4);
pipe_resource_reference(&buffers->buffers[bufidx], NULL);
buffers->desc.enabled_mask &= ~(1llu << bufidx);
- buffers->desc.dirty_mask |= 1llu << bufidx;
}
- si_update_descriptors(sctx, &buffers->desc);
+ buffers->desc.list_dirty = true;
}
static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
@@ -1034,22 +795,19 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
/* Read/Write buffers. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
- bool found = false;
uint64_t mask = buffers->desc.enabled_mask;
while (mask) {
i = u_bit_scan64(&mask);
if (buffers->buffers[i] == buf) {
- si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
+ si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
old_va, buf);
+ buffers->desc.list_dirty = true;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
- buffers->desc.dirty_mask |= 1llu << i;
- found = true;
-
if (i >= SI_SO_BUF_OFFSET && shader == PIPE_SHADER_VERTEX) {
/* Update the streamout state. */
if (sctx->b.streamout.begin_emitted) {
@@ -1061,34 +819,25 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
}
}
}
- if (found) {
- si_update_descriptors(sctx, &buffers->desc);
- }
}
/* Constant buffers. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_buffer_resources *buffers = &sctx->const_buffers[shader];
- bool found = false;
uint64_t mask = buffers->desc.enabled_mask;
while (mask) {
unsigned i = u_bit_scan64(&mask);
if (buffers->buffers[i] == buf) {
- si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
+ si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
old_va, buf);
+ buffers->desc.list_dirty = true;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
-
- buffers->desc.dirty_mask |= 1llu << i;
- found = true;
}
}
- if (found) {
- si_update_descriptors(sctx, &buffers->desc);
- }
}
/* Texture buffers - update virtual addresses in sampler view descriptors. */
@@ -1100,223 +849,211 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
/* Texture buffers - update bindings. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_sampler_views *views = &sctx->samplers[shader].views;
- bool found = false;
uint64_t mask = views->desc.enabled_mask;
while (mask) {
unsigned i = u_bit_scan64(&mask);
if (views->views[i]->texture == buf) {
+ si_desc_reset_buffer_offset(ctx, views->desc.list + i*8+4,
+ old_va, buf);
+ views->desc.list_dirty = true;
+
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
rbuffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_BUFFER_RO);
-
- views->desc.dirty_mask |= 1llu << i;
- found = true;
}
}
- if (found) {
- si_update_descriptors(sctx, &views->desc);
- }
}
}
-/* CP DMA */
-
-/* The max number of bytes to copy per packet. */
-#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+/* SHADER USER DATA */
-static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
- bool is_framebuffer)
+static void si_mark_shader_pointers_dirty(struct si_context *sctx,
+ unsigned shader)
{
- struct si_context *sctx = (struct si_context*)ctx;
- unsigned flush_flags, tc_l2_flag;
+ sctx->const_buffers[shader].desc.pointer_dirty = true;
+ sctx->rw_buffers[shader].desc.pointer_dirty = true;
+ sctx->samplers[shader].views.desc.pointer_dirty = true;
+ sctx->samplers[shader].states.desc.pointer_dirty = true;
- if (!size)
- return;
+ if (shader == PIPE_SHADER_VERTEX)
+ sctx->vertex_buffers.pointer_dirty = true;
- /* Mark the buffer range of destination as valid (initialized),
- * so that transfer_map knows it should wait for the GPU when mapping
- * that range. */
- util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
- offset + size);
-
- /* Fallback for unaligned clears. */
- if (offset % 4 != 0 || size % 4 != 0) {
- uint32_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
- sctx->b.rings.gfx.cs,
- PIPE_TRANSFER_WRITE);
- size /= 4;
- for (unsigned i = 0; i < size; i++)
- *map++ = value;
- return;
- }
+ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
+}
- uint64_t va = r600_resource(dst)->gpu_address + offset;
+static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
+{
+ int i;
- /* Flush the caches where the resource is bound. */
- if (is_framebuffer) {
- flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- tc_l2_flag = 0;
- } else {
- flush_flags = SI_CONTEXT_INV_TC_L1 |
- (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
- SI_CONTEXT_INV_KCACHE;
- tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ for (i = 0; i < SI_NUM_SHADERS; i++) {
+ si_mark_shader_pointers_dirty(sctx, i);
}
+}
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- flush_flags;
-
- while (size) {
- unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
- unsigned dma_flags = tc_l2_flag;
-
- si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
- FALSE);
-
- /* This must be done after need_cs_space. */
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
- (struct r600_resource*)dst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
-
- /* Flush the caches for the first copy only.
- * Also wait for the previous CP DMA operations. */
- if (sctx->b.flags) {
- si_emit_cache_flush(&sctx->b, NULL);
- dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
- }
-
- /* Do the synchronization after the last copy, so that all data is written to memory. */
- if (size == byte_count)
- dma_flags |= R600_CP_DMA_SYNC;
+/* Set a base register address for user data constants in the given shader.
+ * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
+ */
+static void si_set_user_data_base(struct si_context *sctx,
+ unsigned shader, uint32_t new_base)
+{
+ uint32_t *base = &sctx->shader_userdata.sh_base[shader];
- /* Emit the clear packet. */
- si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value, dma_flags);
+ if (*base != new_base) {
+ *base = new_base;
- size -= byte_count;
- va += byte_count;
+ if (new_base)
+ si_mark_shader_pointers_dirty(sctx, shader);
}
-
- /* Flush the caches again in case the 3D engine has been prefetching
- * the resource. */
- sctx->b.flags |= flush_flags;
-
- if (tc_l2_flag)
- r600_resource(dst)->TC_L2_dirty = true;
}
-void si_copy_buffer(struct si_context *sctx,
- struct pipe_resource *dst, struct pipe_resource *src,
- uint64_t dst_offset, uint64_t src_offset, unsigned size,
- bool is_framebuffer)
+/* This must be called when these shaders are changed from non-NULL to NULL
+ * and vice versa:
+ * - geometry shader
+ * - tessellation control shader
+ * - tessellation evaluation shader
+ */
+void si_shader_change_notify(struct si_context *sctx)
{
- unsigned flush_flags, tc_l2_flag;
-
- if (!size)
- return;
-
- /* Mark the buffer range of destination as valid (initialized),
- * so that transfer_map knows it should wait for the GPU when mapping
- * that range. */
- util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
- dst_offset + size);
-
- dst_offset += r600_resource(dst)->gpu_address;
- src_offset += r600_resource(src)->gpu_address;
-
- /* Flush the caches where the resource is bound. */
- if (is_framebuffer) {
- flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- tc_l2_flag = 0;
+ /* VS can be bound as VS, ES, or LS. */
+ if (sctx->tes_shader)
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B530_SPI_SHADER_USER_DATA_LS_0);
+ else if (sctx->gs_shader)
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0);
+ else
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0);
+
+ /* TES can be bound as ES, VS, or not bound. */
+ if (sctx->tes_shader) {
+ if (sctx->gs_shader)
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0);
+ else
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0);
} else {
- flush_flags = SI_CONTEXT_INV_TC_L1 |
- (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
- SI_CONTEXT_INV_KCACHE;
- tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
}
+}
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- flush_flags;
+static void si_emit_shader_pointer(struct si_context *sctx,
+ struct si_descriptors *desc,
+ unsigned sh_base, bool keep_dirty)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ uint64_t va;
- while (size) {
- unsigned sync_flags = tc_l2_flag;
- unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+ if (!desc->pointer_dirty || !desc->buffer)
+ return;
- si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
+ va = desc->buffer->gpu_address +
+ desc->buffer_offset;
- /* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
- if (sctx->b.flags) {
- si_emit_cache_flush(&sctx->b, NULL);
- sync_flags |= SI_CP_DMA_RAW_WAIT;
- }
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
+ radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
- /* Do the synchronization after the last copy, so that all data is written to memory. */
- if (size == byte_count) {
- sync_flags |= R600_CP_DMA_SYNC;
- }
+ desc->pointer_dirty = keep_dirty;
+}
- /* This must be done after r600_need_cs_space. */
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+static void si_emit_shader_userdata(struct si_context *sctx,
+ struct r600_atom *atom)
+{
+ unsigned i;
+ uint32_t *sh_base = sctx->shader_userdata.sh_base;
+
+ if (sctx->gs_shader) {
+ /* The VS copy shader needs these for clipping, streamout, and rings. */
+ unsigned vs_base = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ unsigned i = PIPE_SHADER_VERTEX;
+
+ si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, vs_base, true);
+ si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, vs_base, true);
+
+ /* The TESSEVAL shader needs this for the ESGS ring buffer. */
+ si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+ } else if (sctx->tes_shader) {
+ /* The TESSEVAL shader needs this for streamout. */
+ si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
+ }
- si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
+ for (i = 0; i < SI_NUM_SHADERS; i++) {
+ unsigned base = sh_base[i];
- size -= byte_count;
- src_offset += byte_count;
- dst_offset += byte_count;
- }
+ if (!base)
+ continue;
- /* Flush the caches again in case the 3D engine has been prefetching
- * the resource. */
- sctx->b.flags |= flush_flags;
+ if (i != PIPE_SHADER_TESS_EVAL)
+ si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false);
- if (tc_l2_flag)
- r600_resource(dst)->TC_L2_dirty = true;
+ si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
+ si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
+ si_emit_shader_pointer(sctx, &sctx->samplers[i].states.desc, base, false);
+ }
+ si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
}
-/* INIT/DEINIT */
+/* INIT/DEINIT/UPLOAD */
void si_init_all_descriptors(struct si_context *sctx)
{
int i;
for (i = 0; i < SI_NUM_SHADERS; i++) {
- si_init_buffer_resources(sctx, &sctx->const_buffers[i],
- SI_NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
+ si_init_buffer_resources(&sctx->const_buffers[i],
+ SI_NUM_CONST_BUFFERS, SI_SGPR_CONST,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
- si_init_buffer_resources(sctx, &sctx->rw_buffers[i],
- i == PIPE_SHADER_VERTEX ?
- SI_NUM_RW_BUFFERS : SI_NUM_RING_BUFFERS,
- i, SI_SGPR_RW_BUFFERS,
+ si_init_buffer_resources(&sctx->rw_buffers[i],
+ SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
- si_init_sampler_views(sctx, &sctx->samplers[i].views, i);
-
- si_init_descriptors(sctx, &sctx->samplers[i].states.desc,
- si_get_shader_user_data_base(i) + SI_SGPR_SAMPLER * 4,
- 4, SI_NUM_SAMPLER_STATES, si_emit_sampler_states);
-
- sctx->atoms.s.const_buffers[i] = &sctx->const_buffers[i].desc.atom;
- sctx->atoms.s.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom;
- sctx->atoms.s.sampler_views[i] = &sctx->samplers[i].views.desc.atom;
- sctx->atoms.s.sampler_states[i] = &sctx->samplers[i].states.desc.atom;
+ si_init_descriptors(&sctx->samplers[i].views.desc,
+ SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS);
+ si_init_descriptors(&sctx->samplers[i].states.desc,
+ SI_SGPR_SAMPLER, 4, SI_NUM_SAMPLER_STATES);
}
- si_init_descriptors(sctx, &sctx->vertex_buffers,
- si_get_shader_user_data_base(PIPE_SHADER_VERTEX) +
- SI_SGPR_VERTEX_BUFFER*4, 4, SI_NUM_VERTEX_BUFFERS,
- si_emit_shader_pointer);
- sctx->atoms.s.vertex_buffers = &sctx->vertex_buffers.atom;
+ si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFER,
+ 4, SI_NUM_VERTEX_BUFFERS);
/* Set pipe_context functions. */
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
- sctx->b.clear_buffer = si_clear_buffer;
sctx->b.invalidate_buffer = si_invalidate_buffer;
+
+ /* Shader user data. */
+ sctx->atoms.s.shader_userdata = &sctx->shader_userdata.atom;
+ sctx->shader_userdata.atom.emit = (void*)si_emit_shader_userdata;
+
+ /* Upper bound, 4 pointers per shader, +1 for vertex buffers, +2 for the VS copy shader. */
+ sctx->shader_userdata.atom.num_dw = (SI_NUM_SHADERS * 4 + 1 + 2) * 4;
+
+ /* Set default and immutable mappings. */
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0);
+ si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
+ si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
+}
+
+bool si_upload_shader_descriptors(struct si_context *sctx)
+{
+ int i;
+
+ for (i = 0; i < SI_NUM_SHADERS; i++) {
+ if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
+ !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
+ !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
+ !si_upload_descriptors(sctx, &sctx->samplers[i].states.desc))
+ return false;
+ }
+ return si_upload_vertex_buffer_descriptors(sctx);
}
void si_release_all_descriptors(struct si_context *sctx)
@@ -1343,4 +1080,5 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states);
}
si_vertex_buffers_begin_new_cs(sctx);
+ si_shader_userdata_begin_new_cs(sctx);
}
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 313ced7f5d1..307dc391431 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -30,10 +30,32 @@
void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
boolean count_draw_in)
{
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
int i;
+ /* If the CS is sufficiently large, don't count the space needed
+ * and just flush if there is less than 8096 dwords left. */
+ if (cs->max_dw >= 24 * 1024) {
+ if (cs->cdw > cs->max_dw - 8 * 1024)
+ ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ return;
+ }
+
+ /* There are two memory usage counters in the winsys for all buffers
+ * that have been added (cs_add_reloc) and two counters in the pipe
+ * driver for those that haven't been added yet.
+ * */
+ if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
+ ctx->b.gtt = 0;
+ ctx->b.vram = 0;
+ ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ return;
+ }
+ ctx->b.gtt = 0;
+ ctx->b.vram = 0;
+
/* The number of dwords we already used in the CS so far. */
- num_dw += ctx->b.rings.gfx.cs->cdw;
+ num_dw += cs->cdw;
if (count_draw_in) {
for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
@@ -50,7 +72,8 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
}
/* Count in queries_suspend. */
- num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend;
+ num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
+ ctx->b.num_cs_dw_timer_queries_suspend;
/* Count in streamout_end at the end of CS. */
if (ctx->b.streamout.begin_emitted) {
@@ -72,7 +95,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
#endif
/* Flush if there's not enough space. */
- if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+ if (num_dw > cs->max_dw) {
ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
}
@@ -82,9 +105,16 @@ void si_context_gfx_flush(void *context, unsigned flags,
{
struct si_context *ctx = context;
struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
-
- if (cs->cdw == ctx->b.initial_gfx_cs_size && !fence)
+ struct radeon_winsys *ws = ctx->b.ws;
+
+ if (cs->cdw == ctx->b.initial_gfx_cs_size &&
+ (!fence || ctx->last_gfx_fence)) {
+ if (fence)
+ ws->fence_reference(fence, ctx->last_gfx_fence);
+ if (!(flags & RADEON_FLUSH_ASYNC))
+ ws->cs_sync_flush(cs);
return;
+ }
ctx->b.rings.gfx.flushing = true;
@@ -101,9 +131,13 @@ void si_context_gfx_flush(void *context, unsigned flags,
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
/* Flush the CS. */
- ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++);
+ ws->cs_flush(cs, flags, &ctx->last_gfx_fence,
+ ctx->screen->b.cs_count++);
ctx->b.rings.gfx.flushing = false;
+ if (fence)
+ ws->fence_reference(fence, ctx->last_gfx_fence);
+
#if SI_TRACE_CS
if (ctx->screen->b.trace_bo) {
struct si_screen *sscreen = ctx->screen;
@@ -111,7 +145,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
for (i = 0; i < 10; i++) {
usleep(5);
- if (!ctx->b.ws->buffer_is_busy(sscreen->b.trace_bo->buf, RADEON_USAGE_READWRITE)) {
+ if (!ws->buffer_is_busy(sscreen->b.trace_bo->buf, RADEON_USAGE_READWRITE)) {
break;
}
}
@@ -130,7 +164,8 @@ void si_context_gfx_flush(void *context, unsigned flags,
void si_begin_new_cs(struct si_context *ctx)
{
/* Flush read caches at the beginning of CS. */
- ctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
+ SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_INV_ICACHE;
@@ -143,24 +178,32 @@ void si_begin_new_cs(struct si_context *ctx)
/* The CS initialization should be emitted before everything else. */
si_pm4_emit(ctx, ctx->init_config);
- ctx->clip_regs.dirty = true;
- ctx->framebuffer.atom.dirty = true;
- ctx->msaa_sample_locs.dirty = true;
- ctx->msaa_config.dirty = true;
- ctx->db_render_state.dirty = true;
- ctx->b.streamout.enable_atom.dirty = true;
+ si_mark_atom_dirty(ctx, &ctx->clip_regs);
+ si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+ si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
+ si_mark_atom_dirty(ctx, &ctx->msaa_config);
+ si_mark_atom_dirty(ctx, &ctx->db_render_state);
+ si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_all_descriptors_begin_new_cs(ctx);
r600_postflush_resume_features(&ctx->b);
ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
+
+ /* Invalidate various draw states so that they are emitted before
+ * the first draw call. */
si_invalidate_draw_sh_constants(ctx);
ctx->last_primitive_restart_en = -1;
ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
ctx->last_gs_out_prim = -1;
ctx->last_prim = -1;
ctx->last_multi_vgt_param = -1;
+ ctx->last_ls_hs_config = -1;
ctx->last_rast_prim = -1;
ctx->last_sc_line_stipple = ~0;
ctx->emit_scratch_reloc = true;
+ ctx->last_ls = NULL;
+ ctx->last_tcs = NULL;
+ ctx->last_tes_sh_base = -1;
+ ctx->last_num_tcs_input_cp = -1;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 53ae71a8c92..473a2e9ad12 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -36,32 +36,42 @@
static void si_destroy_context(struct pipe_context *context)
{
struct si_context *sctx = (struct si_context *)context;
+ int i;
si_release_all_descriptors(sctx);
pipe_resource_reference(&sctx->esgs_ring, NULL);
pipe_resource_reference(&sctx->gsvs_ring, NULL);
+ pipe_resource_reference(&sctx->tf_ring, NULL);
pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
r600_resource_reference(&sctx->border_color_table, NULL);
r600_resource_reference(&sctx->scratch_buffer, NULL);
+ sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
si_pm4_free_state(sctx, sctx->init_config, ~0);
si_pm4_delete_state(sctx, gs_rings, sctx->gs_rings);
- si_pm4_delete_state(sctx, gs_onoff, sctx->gs_on);
- si_pm4_delete_state(sctx, gs_onoff, sctx->gs_off);
+ si_pm4_delete_state(sctx, tf_ring, sctx->tf_state);
+ for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
+ si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
if (sctx->pstipple_sampler_state)
sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
- if (sctx->dummy_pixel_shader) {
+ if (sctx->dummy_pixel_shader)
sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
- }
- sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
- sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_resolve);
- sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_decompress);
- sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fastclear);
+ if (sctx->fixed_func_tcs_shader)
+ sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader);
+ if (sctx->custom_dsa_flush)
+ sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
+ if (sctx->custom_blend_resolve)
+ sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_resolve);
+ if (sctx->custom_blend_decompress)
+ sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_decompress);
+ if (sctx->custom_blend_fastclear)
+ sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fastclear);
util_unreference_framebuffer_state(&sctx->framebuffer.state);
- util_blitter_destroy(sctx->blitter);
+ if (sctx->blitter)
+ util_blitter_destroy(sctx->blitter);
si_pm4_cleanup(sctx);
@@ -74,6 +84,14 @@ static void si_destroy_context(struct pipe_context *context)
FREE(sctx);
}
+static enum pipe_reset_status
+si_amdgpu_get_reset_status(struct pipe_context *ctx)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+
+ return sctx->b.ws->ctx_query_reset_status(sctx->b.ctx);
+}
+
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv)
{
struct si_context *sctx = CALLOC_STRUCT(si_context);
@@ -91,13 +109,18 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
sctx->b.b.screen = screen; /* this must be set first */
sctx->b.b.priv = priv;
sctx->b.b.destroy = si_destroy_context;
+ sctx->b.set_atom_dirty = (void *)si_set_atom_dirty;
sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
if (!r600_common_context_init(&sctx->b, &sscreen->b))
goto fail;
+ if (sscreen->b.info.drm_major == 3)
+ sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status;
+
si_init_blit_functions(sctx);
si_init_compute_functions(sctx);
+ si_init_cp_dma_functions(sctx);
if (sscreen->b.info.has_uvd) {
sctx->b.b.create_video_codec = si_uvd_create_decoder;
@@ -107,7 +130,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
sctx->b.b.create_video_buffer = vl_video_buffer_create;
}
- sctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX, si_context_gfx_flush,
+ sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
sctx, sscreen->b.trace_bo ?
sscreen->b.trace_bo->cs_buf : NULL);
sctx->b.rings.gfx.flush = si_context_gfx_flush;
@@ -127,17 +150,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;
- switch (sctx->b.chip_class) {
- case SI:
- case CIK:
- si_init_state_functions(sctx);
- si_init_shader_functions(sctx);
- si_init_config(sctx);
- break;
- default:
- R600_ERR("Unsupported chip class %d.\n", sctx->b.chip_class);
- goto fail;
- }
+ si_init_state_functions(sctx);
+ si_init_shader_functions(sctx);
if (sscreen->b.debug_flags & DBG_FORCE_DMA)
sctx->b.b.resource_copy_region = sctx->b.dma_copy;
@@ -181,7 +195,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
r600_target = radeon_llvm_get_r600_target(triple);
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
r600_get_llvm_processor_name(sscreen->b.family),
- "+DumpCode,+vgpr-spilling",
+ sctx->b.chip_class >= VI ?
+ "+DumpCode" :
+ "+DumpCode,+vgpr-spilling",
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
LLVMCodeModelDefault);
@@ -252,15 +268,27 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_TGSI_TEXCOORD:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ return (sscreen->b.info.drm_major == 2 &&
+ sscreen->b.info.drm_minor >= 43) ||
+ sscreen->b.info.drm_major == 3;
+
case PIPE_CAP_TEXTURE_MULTISAMPLE:
/* 2D tiling on CIK is supported since DRM 2.35.0 */
return sscreen->b.chip_class < CIK ||
- sscreen->b.info.drm_minor >= 35;
+ (sscreen->b.info.drm_major == 2 &&
+ sscreen->b.info.drm_minor >= 35) ||
+ sscreen->b.info.drm_major == 3;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return R600_MAP_BUFFER_ALIGNMENT;
@@ -270,7 +298,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 4;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
- return 330;
+ return HAVE_LLVM >= 0x0307 ? 410 : 330;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
@@ -289,13 +317,13 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
- case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
- case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ return 30;
+
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600;
@@ -314,7 +342,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
return 4095;
case PIPE_CAP_MAX_VERTEX_STREAMS:
- return 1;
+ return 4;
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
return 2048;
@@ -335,7 +363,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 8;
case PIPE_CAP_MAX_VIEWPORTS:
- return 1;
+ return 16;
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
@@ -375,6 +403,13 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
break;
+ case PIPE_SHADER_TESS_CTRL:
+ case PIPE_SHADER_TESS_EVAL:
+ /* LLVM 3.6.2 is required for tessellation because of bug fixes there */
+ if (HAVE_LLVM < 0x0306 ||
+ (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 2))
+ return 0;
+ break;
case PIPE_SHADER_COMPUTE:
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
@@ -401,7 +436,6 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
}
break;
default:
- /* TODO: support tessellation */
return 0;
}
@@ -433,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
/* Indirection of geometry shader input dimension is not
* handled yet
*/
- return shader < PIPE_SHADER_GEOMETRY;
+ return shader != PIPE_SHADER_GEOMETRY;
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
@@ -448,6 +482,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_DOUBLES:
+ return HAVE_LLVM >= 0x0307;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
return 0;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 2d67342f160..553e1f32683 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -48,7 +48,8 @@
#define SI_MAX_DRAW_CS_DWORDS \
(/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
- /*draw regs:*/ 16 + /*draw packets:*/ 31)
+ /*draw regs:*/ 18 + /*draw packets:*/ 31 +\
+ /*derived tess state:*/ 19)
/* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
@@ -125,8 +126,6 @@ struct si_framebuffer {
#define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))
-#define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1)
-
struct si_context {
struct r600_common_context b;
struct blitter_context *blitter;
@@ -137,17 +136,12 @@ struct si_context {
void *pstipple_sampler_state;
struct si_screen *screen;
struct si_pm4_state *init_config;
+ struct pipe_fence_handle *last_gfx_fence;
+ struct si_shader_selector *fixed_func_tcs_shader;
union {
struct {
/* The order matters. */
- struct r600_atom *vertex_buffers;
- struct r600_atom *const_buffers[SI_NUM_SHADERS];
- struct r600_atom *rw_buffers[SI_NUM_SHADERS];
- struct r600_atom *sampler_views[SI_NUM_SHADERS];
- struct r600_atom *sampler_states[SI_NUM_SHADERS];
- /* Caches must be flushed after resource descriptors are
- * updated in memory. */
struct r600_atom *cache_flush;
struct r600_atom *streamout_begin;
struct r600_atom *streamout_enable; /* must be after streamout_begin */
@@ -156,6 +150,7 @@ struct si_context {
struct r600_atom *db_render_state;
struct r600_atom *msaa_config;
struct r600_atom *clip_regs;
+ struct r600_atom *shader_userdata;
} s;
struct r600_atom *array[0];
} atoms;
@@ -168,7 +163,10 @@ struct si_context {
struct si_shader_selector *ps_shader;
struct si_shader_selector *gs_shader;
struct si_shader_selector *vs_shader;
+ struct si_shader_selector *tcs_shader;
+ struct si_shader_selector *tes_shader;
struct si_cs_shader_state cs_shader_state;
+ struct si_shader_data shader_userdata;
/* shader information */
unsigned sprite_coord_enable;
bool flatshade;
@@ -194,13 +192,16 @@ struct si_context {
/* With rasterizer discard, there doesn't have to be a pixel shader.
* In that case, we bind this one: */
void *dummy_pixel_shader;
- struct si_pm4_state *gs_on;
- struct si_pm4_state *gs_off;
- struct si_pm4_state *gs_rings;
struct r600_atom cache_flush;
struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
+
+ /* VGT states. */
+ struct si_pm4_state *vgt_shader_config[4];
+ struct si_pm4_state *gs_rings;
struct pipe_resource *esgs_ring;
struct pipe_resource *gsvs_ring;
+ struct si_pm4_state *tf_state;
+ struct pipe_resource *tf_ring;
LLVMTargetMachineRef tm;
@@ -218,7 +219,7 @@ struct si_context {
bool db_depth_disable_expclear;
unsigned ps_db_shader_control;
- /* Draw state. */
+ /* Emitted draw state. */
int last_base_vertex;
int last_start_instance;
int last_sh_base_reg;
@@ -227,6 +228,7 @@ struct si_context {
int last_gs_out_prim;
int last_prim;
int last_multi_vgt_param;
+ int last_ls_hs_config;
int last_rast_prim;
unsigned last_sc_line_stipple;
int current_rast_prim; /* primitive type after TES, GS */
@@ -235,6 +237,12 @@ struct si_context {
boolean emit_scratch_reloc;
unsigned scratch_waves;
unsigned spi_tmpring_size;
+
+ /* Emitted derived tessellation state. */
+ struct si_shader *last_ls; /* local shader (VS) */
+ struct si_shader_selector *last_tcs;
+ int last_num_tcs_input_cp;
+ int last_tes_sh_base;
};
/* cik_sdma.c */
@@ -260,6 +268,13 @@ void si_resource_copy_region(struct pipe_context *ctx,
unsigned src_level,
const struct pipe_box *src_box);
+/* si_cp_dma.c */
+void si_copy_buffer(struct si_context *sctx,
+ struct pipe_resource *dst, struct pipe_resource *src,
+ uint64_t dst_offset, uint64_t src_offset, unsigned size,
+ bool is_framebuffer);
+void si_init_cp_dma_functions(struct si_context *sctx);
+
/* si_dma.c */
void si_dma_copy(struct pipe_context *ctx,
struct pipe_resource *dst,
@@ -293,7 +308,7 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,
* common helpers
*/
-static INLINE struct r600_resource *
+static inline struct r600_resource *
si_resource_create_custom(struct pipe_screen *screen,
unsigned usage, unsigned size)
{
@@ -302,7 +317,7 @@ si_resource_create_custom(struct pipe_screen *screen,
PIPE_BIND_CUSTOM, usage, size));
}
-static INLINE void
+static inline void
si_invalidate_draw_sh_constants(struct si_context *sctx)
{
sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
@@ -310,4 +325,18 @@ si_invalidate_draw_sh_constants(struct si_context *sctx)
sctx->last_sh_base_reg = -1; /* reset to an unknown value */
}
+static inline void
+si_set_atom_dirty(struct si_context *sctx,
+ struct r600_atom *atom, bool dirty)
+{
+ atom->dirty = dirty;
+}
+
+static inline void
+si_mark_atom_dirty(struct si_context *sctx,
+ struct r600_atom *atom)
+{
+ si_set_atom_dirty(sctx, atom, true);
+}
+
#endif
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 47e5f96cbed..4288e9b2ab1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -31,6 +31,7 @@
#include "gallivm/lp_bld_intr.h"
#include "gallivm/lp_bld_logic.h"
#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_bitarit.h"
#include "gallivm/lp_bld_flow.h"
#include "radeon/r600_cs.h"
#include "radeon/radeon_llvm.h"
@@ -71,18 +72,25 @@ struct si_shader_context
int param_streamout_write_index;
int param_streamout_offset[4];
int param_vertex_id;
+ int param_rel_auto_id;
+ int param_vs_prim_id;
int param_instance_id;
+ int param_tes_u;
+ int param_tes_v;
+ int param_tes_rel_patch_id;
+ int param_tes_patch_id;
+ int param_es2gs_offset;
LLVMTargetMachineRef tm;
LLVMValueRef const_md;
LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
- LLVMValueRef ddxy_lds;
+ LLVMValueRef lds;
LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
LLVMValueRef so_buffers[4];
LLVMValueRef esgs_ring;
- LLVMValueRef gsvs_ring;
- LLVMValueRef gs_next_vertex;
+ LLVMValueRef gsvs_ring[4];
+ LLVMValueRef gs_next_vertex[4];
};
static struct si_shader_context * si_shader_context(
@@ -129,12 +137,29 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
assert(index <= 1);
return 2 + index;
case TGSI_SEMANTIC_GENERIC:
- assert(index <= 63-4);
- return 4 + index;
+ if (index <= 63-4)
+ return 4 + index;
+ else
+ /* same explanation as in the default statement,
+ * the only user hitting this is st/nine.
+ */
+ return 0;
+
+ /* patch indices are completely separate and thus start from 0 */
+ case TGSI_SEMANTIC_TESSOUTER:
+ return 0;
+ case TGSI_SEMANTIC_TESSINNER:
+ return 1;
+ case TGSI_SEMANTIC_PATCH:
+ return 2 + index;
default:
- assert(0);
- return 63;
+ /* Don't fail here. The result of this function is only used
+ * for LS, TCS, TES, and GS, where legacy GL semantics can't
+ * occur, but this function is called for all vertex shaders
+ * before it's known whether LS will be compiled or not.
+ */
+ return 0;
}
}
@@ -205,6 +230,136 @@ static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
return value;
}
+static LLVMValueRef get_rel_patch_id(struct si_shader_context *si_shader_ctx)
+{
+ switch (si_shader_ctx->type) {
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 0, 8);
+
+ case TGSI_PROCESSOR_TESS_EVAL:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_tes_rel_patch_id);
+
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
+/* Tessellation shaders pass outputs to the next shader using LDS.
+ *
+ * LS outputs = TCS inputs
+ * TCS outputs = TES inputs
+ *
+ * The LDS layout is:
+ * - TCS inputs for patch 0
+ * - TCS inputs for patch 1
+ * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
+ * - ...
+ * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
+ * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
+ * - TCS outputs for patch 1
+ * - Per-patch TCS outputs for patch 1
+ * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
+ * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
+ * - ...
+ *
+ * All three shaders VS(LS), TCS, TES share the same LDS space.
+ */
+
+static LLVMValueRef
+get_tcs_in_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX)
+ return unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 0, 13);
+ else if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+ return unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 0, 13);
+ else {
+ assert(0);
+ return NULL;
+ }
+}
+
+static LLVMValueRef
+get_tcs_out_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+ return unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_offset(struct si_shader_context *si_shader_ctx)
+{
+ return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+ unpack_param(si_shader_ctx,
+ SI_PARAM_TCS_OUT_OFFSETS,
+ 0, 16),
+ 4);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+ return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+ unpack_param(si_shader_ctx,
+ SI_PARAM_TCS_OUT_OFFSETS,
+ 16, 16),
+ 4);
+}
+
+static LLVMValueRef
+get_tcs_in_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMValueRef patch_stride = get_tcs_in_patch_stride(si_shader_ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+ return LLVMBuildMul(gallivm->builder, patch_stride, rel_patch_id, "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(si_shader_ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+ return LLVMBuildAdd(gallivm->builder, patch0_offset,
+ LLVMBuildMul(gallivm->builder, patch_stride,
+ rel_patch_id, ""),
+ "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMValueRef patch0_patch_data_offset =
+ get_tcs_out_patch0_patch_data_offset(si_shader_ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+ return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
+ LLVMBuildMul(gallivm->builder, patch_stride,
+ rel_patch_id, ""),
+ "");
+}
+
+static void build_indexed_store(struct si_shader_context *si_shader_ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index,
+ LLVMValueRef value)
+{
+ struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef indices[2], pointer;
+
+ indices[0] = bld_base->uint_bld.zero;
+ indices[1] = index;
+
+ pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
+ LLVMBuildStore(gallivm->builder, value, pointer);
+}
+
/**
* Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
* It's equivalent to doing a load from &base_ptr[index].
@@ -308,7 +463,7 @@ static void declare_input_vs(
args[0] = t_list;
args[1] = attribute_offset;
args[2] = buffer_index;
- input = build_intrinsic(gallivm->builder,
+ input = lp_build_intrinsic(gallivm->builder,
"llvm.SI.vs.load.input", vec4_type, args, 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -323,6 +478,285 @@ static void declare_input_vs(
}
}
+static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
+ unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+
+ if (swizzle > 0)
+ return bld_base->uint_bld.zero;
+
+ switch (si_shader_ctx->type) {
+ case TGSI_PROCESSOR_VERTEX:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_vs_prim_id);
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_PATCH_ID);
+ case TGSI_PROCESSOR_TESS_EVAL:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_tes_patch_id);
+ case TGSI_PROCESSOR_GEOMETRY:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_PRIMITIVE_ID);
+ default:
+ assert(0);
+ return bld_base->uint_bld.zero;
+ }
+}
+
+/**
+ * Return the value of tgsi_ind_register for indexing.
+ * This is the indirect index with the constant offset added to it.
+ */
+static LLVMValueRef get_indirect_index(struct si_shader_context *si_shader_ctx,
+ const struct tgsi_ind_register *ind,
+ int rel_index)
+{
+ struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+ LLVMValueRef result;
+
+ result = si_shader_ctx->radeon_bld.soa.addr[ind->Index][ind->Swizzle];
+ result = LLVMBuildLoad(gallivm->builder, result, "");
+ result = LLVMBuildAdd(gallivm->builder, result,
+ lp_build_const_int32(gallivm, rel_index), "");
+ return result;
+}
+
+/**
+ * Calculate a dword address given an input or output register and a stride.
+ */
+static LLVMValueRef get_dw_address(struct si_shader_context *si_shader_ctx,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src,
+ LLVMValueRef vertex_dw_stride,
+ LLVMValueRef base_addr)
+{
+ struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+ struct tgsi_shader_info *info = &si_shader_ctx->shader->selector->info;
+ ubyte *name, *index, *array_first;
+ int first, param;
+ struct tgsi_full_dst_register reg;
+
+ /* Set the register description. The address computation is the same
+ * for sources and destinations. */
+ if (src) {
+ reg.Register.File = src->Register.File;
+ reg.Register.Index = src->Register.Index;
+ reg.Register.Indirect = src->Register.Indirect;
+ reg.Register.Dimension = src->Register.Dimension;
+ reg.Indirect = src->Indirect;
+ reg.Dimension = src->Dimension;
+ reg.DimIndirect = src->DimIndirect;
+ } else
+ reg = *dst;
+
+ /* If the register is 2-dimensional (e.g. an array of vertices
+ * in a primitive), calculate the base address of the vertex. */
+ if (reg.Register.Dimension) {
+ LLVMValueRef index;
+
+ if (reg.Dimension.Indirect)
+ index = get_indirect_index(si_shader_ctx, &reg.DimIndirect,
+ reg.Dimension.Index);
+ else
+ index = lp_build_const_int32(gallivm, reg.Dimension.Index);
+
+ base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+ LLVMBuildMul(gallivm->builder, index,
+ vertex_dw_stride, ""), "");
+ }
+
+ /* Get information about the register. */
+ if (reg.Register.File == TGSI_FILE_INPUT) {
+ name = info->input_semantic_name;
+ index = info->input_semantic_index;
+ array_first = info->input_array_first;
+ } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
+ name = info->output_semantic_name;
+ index = info->output_semantic_index;
+ array_first = info->output_array_first;
+ } else {
+ assert(0);
+ return NULL;
+ }
+
+ if (reg.Register.Indirect) {
+ /* Add the relative address of the element. */
+ LLVMValueRef ind_index;
+
+ if (reg.Indirect.ArrayID)
+ first = array_first[reg.Indirect.ArrayID];
+ else
+ first = reg.Register.Index;
+
+ ind_index = get_indirect_index(si_shader_ctx, &reg.Indirect,
+ reg.Register.Index - first);
+
+ base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+ LLVMBuildMul(gallivm->builder, ind_index,
+ lp_build_const_int32(gallivm, 4), ""), "");
+
+ param = si_shader_io_get_unique_index(name[first], index[first]);
+ } else {
+ param = si_shader_io_get_unique_index(name[reg.Register.Index],
+ index[reg.Register.Index]);
+ }
+
+ /* Add the base address of the element. */
+ return LLVMBuildAdd(gallivm->builder, base_addr,
+ lp_build_const_int32(gallivm, param * 4), "");
+}
+
+/**
+ * Load from LDS.
+ *
+ * \param type output value type
+ * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
+ * \param dw_addr address in dwords
+ */
+static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
+ enum tgsi_opcode_type type, unsigned swizzle,
+ LLVMValueRef dw_addr)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef value;
+
+ if (swizzle == ~0) {
+ LLVMValueRef values[TGSI_NUM_CHANNELS];
+
+ for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
+ values[chan] = lds_load(bld_base, type, chan, dw_addr);
+
+ return lp_build_gather_values(bld_base->base.gallivm, values,
+ TGSI_NUM_CHANNELS);
+ }
+
+ dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+ lp_build_const_int32(gallivm, swizzle));
+
+ value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
+ return LLVMBuildBitCast(gallivm->builder, value,
+ tgsi2llvmtype(bld_base, type), "");
+}
+
+/**
+ * Store to LDS.
+ *
+ * \param swizzle offset (typically 0..3)
+ * \param dw_addr address in dwords
+ * \param value value to store
+ */
+static void lds_store(struct lp_build_tgsi_context * bld_base,
+ unsigned swizzle, LLVMValueRef dw_addr,
+ LLVMValueRef value)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+ dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+ lp_build_const_int32(gallivm, swizzle));
+
+ value = LLVMBuildBitCast(gallivm->builder, value,
+ LLVMInt32TypeInContext(gallivm->context), "");
+ build_indexed_store(si_shader_ctx, si_shader_ctx->lds,
+ dw_addr, value);
+}
+
+static LLVMValueRef fetch_input_tcs(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type, unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ LLVMValueRef dw_addr, stride;
+
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+ dw_addr = get_tcs_in_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+
+ return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_output_tcs(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type, unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ LLVMValueRef dw_addr, stride;
+
+ if (reg->Register.Dimension) {
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+ }
+
+ return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_input_tes(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type, unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ LLVMValueRef dw_addr, stride;
+
+ if (reg->Register.Dimension) {
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+ }
+
+ return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static void store_output_tcs(struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction * inst,
+ const struct tgsi_opcode_info * info,
+ LLVMValueRef dst[4])
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+ unsigned chan_index;
+ LLVMValueRef dw_addr, stride;
+
+ /* Only handle per-patch and per-vertex outputs here.
+ * Vectors will be lowered to scalars and this function will be called again.
+ */
+ if (reg->Register.File != TGSI_FILE_OUTPUT ||
+ (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
+ radeon_llvm_emit_store(bld_base, inst, info, dst);
+ return;
+ }
+
+ if (reg->Register.Dimension) {
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, reg, NULL, stride, dw_addr);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, reg, NULL, NULL, dw_addr);
+ }
+
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+ LLVMValueRef value = dst[chan_index];
+
+ if (inst->Instruction.Saturate)
+ value = radeon_llvm_saturate(bld_base, value);
+
+ lds_store(bld_base, chan_index, dw_addr, value);
+ }
+}
+
static LLVMValueRef fetch_input_gs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
@@ -342,13 +776,8 @@ static LLVMValueRef fetch_input_gs(
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
- if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID) {
- if (swizzle == 0)
- return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_PRIMITIVE_ID);
- else
- return uint->zero;
- }
+ if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
+ return get_primitive_id(bld_base, swizzle);
if (!reg->Register.Dimension)
return NULL;
@@ -380,7 +809,7 @@ static LLVMValueRef fetch_input_gs(
args[1] = vtx_offset;
args[2] = lp_build_const_int32(gallivm,
(get_param_index(semantic_name, semantic_index,
- shader->selector->gs_used_inputs) * 4 +
+ shader->selector->inputs_read) * 4 +
swizzle) * 256);
args[3] = uint->zero;
args[4] = uint->one; /* OFFEN */
@@ -390,13 +819,42 @@ static LLVMValueRef fetch_input_gs(
args[8] = uint->zero; /* TFE */
return LLVMBuildBitCast(gallivm->builder,
- build_intrinsic(gallivm->builder,
+ lp_build_intrinsic(gallivm->builder,
"llvm.SI.buffer.load.dword.i32.i32",
i32, args, 9,
LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
tgsi2llvmtype(bld_base, type), "");
}
+static int lookup_interp_param_index(unsigned interpolate, unsigned location)
+{
+ switch (interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ return 0;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
+ return SI_PARAM_LINEAR_SAMPLE;
+ else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
+ return SI_PARAM_LINEAR_CENTROID;
+ else
+ return SI_PARAM_LINEAR_CENTER;
+ break;
+ case TGSI_INTERPOLATE_COLOR:
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
+ return SI_PARAM_PERSP_SAMPLE;
+ else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
+ return SI_PARAM_PERSP_CENTROID;
+ else
+ return SI_PARAM_PERSP_CENTER;
+ break;
+ default:
+ fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+ return -1;
+ }
+}
+
static void declare_input_fs(
struct radeon_llvm_context *radeon_bld,
unsigned input_index,
@@ -411,7 +869,8 @@ static void declare_input_fs(
LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
LLVMValueRef main_fn = radeon_bld->main_fn;
- LLVMValueRef interp_param;
+ LLVMValueRef interp_param = NULL;
+ int interp_param_idx;
const char * intr_name;
/* This value is:
@@ -460,31 +919,13 @@ static void declare_input_fs(
attr_number = lp_build_const_int32(gallivm,
shader->ps_input_param_offset[input_index]);
- switch (decl->Interp.Interpolate) {
- case TGSI_INTERPOLATE_CONSTANT:
- interp_param = 0;
- break;
- case TGSI_INTERPOLATE_LINEAR:
- if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_SAMPLE);
- else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID);
- else
- interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER);
- break;
- case TGSI_INTERPOLATE_COLOR:
- case TGSI_INTERPOLATE_PERSPECTIVE:
- if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_SAMPLE);
- else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
- else
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
- break;
- default:
- fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+ shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate;
+ interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
+ decl->Interp.Location);
+ if (interp_param_idx == -1)
return;
- }
+ else if (interp_param_idx)
+ interp_param = LLVMGetParam(main_fn, interp_param_idx);
/* fs.constant returns the param from the middle vertex, so it's not
* really useful for flat shading. It's meant to be used for custom
@@ -522,12 +963,12 @@ static void declare_input_fs(
args[0] = llvm_chan;
args[1] = attr_number;
- front = build_intrinsic(gallivm->builder, intr_name,
+ front = lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
args[1] = back_attr_number;
- back = build_intrinsic(gallivm->builder, intr_name,
+ back = lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -548,7 +989,7 @@ static void declare_input_fs(
args[2] = params;
args[3] = interp_param;
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
- build_intrinsic(gallivm->builder, intr_name,
+ lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
@@ -566,7 +1007,7 @@ static void declare_input_fs(
args[2] = params;
args[3] = interp_param;
radeon_bld->inputs[soa_index] =
- build_intrinsic(gallivm->builder, intr_name,
+ lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
}
@@ -587,10 +1028,35 @@ static LLVMValueRef buffer_load_const(LLVMBuilderRef builder, LLVMValueRef resou
{
LLVMValueRef args[2] = {resource, offset};
- return build_intrinsic(builder, "llvm.SI.load.const", return_type, args, 2,
+ return lp_build_intrinsic(builder, "llvm.SI.load.const", return_type, args, 2,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
}
+static LLVMValueRef load_sample_position(struct radeon_llvm_context *radeon_bld, LLVMValueRef sample_id)
+{
+ struct si_shader_context *si_shader_ctx =
+ si_shader_context(&radeon_bld->soa.bld_base);
+ struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
+ struct gallivm_state *gallivm = &radeon_bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
+ LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
+
+ /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
+ LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
+ LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
+
+ LLVMValueRef pos[4] = {
+ buffer_load_const(builder, resource, offset0, radeon_bld->soa.bld_base.base.elem_type),
+ buffer_load_const(builder, resource, offset1, radeon_bld->soa.bld_base.base.elem_type),
+ lp_build_const_float(gallivm, 0),
+ lp_build_const_float(gallivm, 0)
+ };
+
+ return lp_build_gather_values(gallivm, pos, 4);
+}
+
static void declare_system_value(
struct radeon_llvm_context * radeon_bld,
unsigned index,
@@ -598,6 +1064,7 @@ static void declare_system_value(
{
struct si_shader_context *si_shader_ctx =
si_shader_context(&radeon_bld->soa.bld_base);
+ struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
struct gallivm_state *gallivm = &radeon_bld->gallivm;
LLVMValueRef value = 0;
@@ -626,30 +1093,23 @@ static void declare_system_value(
SI_PARAM_BASE_VERTEX);
break;
+ case TGSI_SEMANTIC_INVOCATIONID:
+ if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+ value = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+ else if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY)
+ value = LLVMGetParam(radeon_bld->main_fn,
+ SI_PARAM_GS_INSTANCE_ID);
+ else
+ assert(!"INVOCATIONID not implemented");
+ break;
+
case TGSI_SEMANTIC_SAMPLEID:
value = get_sample_id(radeon_bld);
break;
case TGSI_SEMANTIC_SAMPLEPOS:
- {
- LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
- LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
- LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
-
- /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
- LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, get_sample_id(radeon_bld), 8);
- LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
-
- LLVMValueRef pos[4] = {
- buffer_load_const(builder, resource, offset0, radeon_bld->soa.bld_base.base.elem_type),
- buffer_load_const(builder, resource, offset1, radeon_bld->soa.bld_base.base.elem_type),
- lp_build_const_float(gallivm, 0),
- lp_build_const_float(gallivm, 0)
- };
- value = lp_build_gather_values(gallivm, pos, 4);
+ value = load_sample_position(radeon_bld, get_sample_id(radeon_bld));
break;
- }
case TGSI_SEMANTIC_SAMPLEMASK:
/* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
@@ -660,6 +1120,48 @@ static void declare_system_value(
value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
break;
+ case TGSI_SEMANTIC_TESSCOORD:
+ {
+ LLVMValueRef coord[4] = {
+ LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_u),
+ LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_v),
+ bld->zero,
+ bld->zero
+ };
+
+ /* For triangles, the vector should be (u, v, 1-u-v). */
+ if (si_shader_ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
+ PIPE_PRIM_TRIANGLES)
+ coord[2] = lp_build_sub(bld, bld->one,
+ lp_build_add(bld, coord[0], coord[1]));
+
+ value = lp_build_gather_values(gallivm, coord, 4);
+ break;
+ }
+
+ case TGSI_SEMANTIC_VERTICESIN:
+ value = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
+ break;
+
+ case TGSI_SEMANTIC_TESSINNER:
+ case TGSI_SEMANTIC_TESSOUTER:
+ {
+ LLVMValueRef dw_addr;
+ int param = si_shader_io_get_unique_index(decl->Semantic.Name, 0);
+
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = LLVMBuildAdd(gallivm->builder, dw_addr,
+ lp_build_const_int32(gallivm, param * 4), "");
+
+ value = lds_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
+ ~0, dw_addr);
+ break;
+ }
+
+ case TGSI_SEMANTIC_PRIMID:
+ value = get_primitive_id(&radeon_bld->soa.bld_base, 0);
+ break;
+
default:
assert(!"unknown system value");
return;
@@ -679,7 +1181,7 @@ static LLVMValueRef fetch_constant(
const struct tgsi_ind_register *ireg = &reg->Indirect;
unsigned buf, idx;
- LLVMValueRef addr;
+ LLVMValueRef addr, bufp;
LLVMValueRef result;
if (swizzle == LP_CHAN_ALL) {
@@ -694,8 +1196,24 @@ static LLVMValueRef fetch_constant(
buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
idx = reg->Register.Index * 4 + swizzle;
- if (!reg->Register.Indirect)
- return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
+ if (!reg->Register.Indirect && !reg->Dimension.Indirect) {
+ if (type != TGSI_TYPE_DOUBLE)
+ return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
+ else {
+ return radeon_llvm_emit_fetch_double(bld_base,
+ si_shader_ctx->constants[buf][idx],
+ si_shader_ctx->constants[buf][idx + 1]);
+ }
+ }
+
+ if (reg->Register.Dimension && reg->Dimension.Indirect) {
+ LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef index;
+ index = get_indirect_index(si_shader_ctx, &reg->DimIndirect,
+ reg->Dimension.Index);
+ bufp = build_indexed_load_const(si_shader_ctx, ptr, index);
+ } else
+ bufp = si_shader_ctx->const_resource[buf];
addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
@@ -703,10 +1221,26 @@ static LLVMValueRef fetch_constant(
addr = lp_build_add(&bld_base->uint_bld, addr,
lp_build_const_int32(base->gallivm, idx * 4));
- result = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
- addr, base->elem_type);
+ result = buffer_load_const(base->gallivm->builder, bufp,
+ addr, bld_base->base.elem_type);
+
+ if (type != TGSI_TYPE_DOUBLE)
+ result = bitcast(bld_base, type, result);
+ else {
+ LLVMValueRef addr2, result2;
+ addr2 = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
+ addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
+ addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
+ addr2 = lp_build_add(&bld_base->uint_bld, addr2,
+ lp_build_const_int32(base->gallivm, idx * 4));
+
+ result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
+ addr2, bld_base->base.elem_type);
- return bitcast(bld_base, type, result);
+ result = radeon_llvm_emit_fetch_double(bld_base,
+ result, result2);
+ }
+ return result;
}
/* Initialize arguments for the shader export intrinsic */
@@ -745,7 +1279,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
args[0] = values[2 * chan];
args[1] = values[2 * chan + 1];
args[chan + 5] =
- build_intrinsic(base->gallivm->builder,
+ lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.packf16",
LLVMInt32TypeInContext(base->gallivm->context),
args, 2,
@@ -827,12 +1361,12 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
lp_build_const_float(gallivm, 1.0f),
lp_build_const_float(gallivm, -1.0f));
- build_intrinsic(gallivm->builder,
+ lp_build_intrinsic(gallivm->builder,
"llvm.AMDGPU.kill",
LLVMVoidTypeInContext(gallivm->context),
&arg, 1, 0);
} else {
- build_intrinsic(gallivm->builder,
+ lp_build_intrinsic(gallivm->builder,
"llvm.AMDGPU.kilp",
LLVMVoidTypeInContext(gallivm->context),
NULL, 0, 0);
@@ -853,7 +1387,7 @@ static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base
SI_PARAM_SAMPLE_COVERAGE);
coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
- coverage = build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
+ coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
bld_base->int_bld.elem_type,
&coverage, 1, LLVMReadNoneAttribute);
@@ -983,16 +1517,16 @@ static void build_tbuffer_store(struct si_shader_context *shader,
lp_build_intrinsic(gallivm->builder, name,
LLVMVoidTypeInContext(gallivm->context),
- args, Elements(args));
+ args, Elements(args), 0);
}
-static void build_streamout_store(struct si_shader_context *shader,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef vaddr,
- LLVMValueRef soffset,
- unsigned inst_offset)
+static void build_tbuffer_store_dwords(struct si_shader_context *shader,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ unsigned num_channels,
+ LLVMValueRef vaddr,
+ LLVMValueRef soffset,
+ unsigned inst_offset)
{
static unsigned dfmt[] = {
V_008F0C_BUF_DATA_FORMAT_32,
@@ -1025,13 +1559,16 @@ static void si_llvm_emit_streamout(struct si_shader_context *shader,
LLVMValueRef so_vtx_count =
unpack_param(shader, shader->param_streamout_config, 16, 7);
- LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
+ LLVMValueRef tid = lp_build_intrinsic(builder, "llvm.SI.tid", i32,
NULL, 0, LLVMReadNoneAttribute);
/* can_emit = tid < so_vtx_count; */
LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
+ LLVMValueRef stream_id =
+ unpack_param(shader, shader->param_streamout_config, 24, 2);
+
/* Emit the streamout code conditionally. This actually avoids
* out-of-bounds buffer access. The hw tells us via the SGPR
* (so_vtx_count) which threads are allowed to emit streamout data. */
@@ -1071,7 +1608,9 @@ static void si_llvm_emit_streamout(struct si_shader_context *shader,
unsigned reg = so->output[i].register_index;
unsigned start = so->output[i].start_component;
unsigned num_comps = so->output[i].num_components;
+ unsigned stream = so->output[i].stream;
LLVMValueRef out[4];
+ struct lp_build_if_state if_ctx_stream;
assert(num_comps && num_comps <= 4);
if (!num_comps || num_comps > 4)
@@ -1105,11 +1644,18 @@ static void si_llvm_emit_streamout(struct si_shader_context *shader,
break;
}
- build_streamout_store(shader, shader->so_buffers[buf_idx],
- vdata, num_comps,
- so_write_offset[buf_idx],
- LLVMConstInt(i32, 0, 0),
- so->output[i].dst_offset*4);
+ LLVMValueRef can_emit_stream =
+ LLVMBuildICmp(builder, LLVMIntEQ,
+ stream_id,
+ lp_build_const_int32(gallivm, stream), "");
+
+ lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
+ build_tbuffer_store_dwords(shader, shader->so_buffers[buf_idx],
+ vdata, num_comps,
+ so_write_offset[buf_idx],
+ LLVMConstInt(i32, 0, 0),
+ so->output[i].dst_offset*4);
+ lp_build_endif(&if_ctx_stream);
}
}
lp_build_endif(&if_ctx);
@@ -1128,7 +1674,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
LLVMValueRef args[9];
LLVMValueRef pos_args[4][9] = { { 0 } };
- LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL;
+ LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
unsigned semantic_name, semantic_index;
unsigned target;
unsigned param_count = 0;
@@ -1154,7 +1700,12 @@ handle_semantic:
continue;
case TGSI_SEMANTIC_LAYER:
layer_value = outputs[i].values[0];
- continue;
+ semantic_name = TGSI_SEMANTIC_GENERIC;
+ goto handle_semantic;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ viewport_index_value = outputs[i].values[0];
+ semantic_name = TGSI_SEMANTIC_GENERIC;
+ goto handle_semantic;
case TGSI_SEMANTIC_POSITION:
target = V_008DFC_SQ_EXP_POS;
break;
@@ -1195,7 +1746,7 @@ handle_semantic:
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
+ args, 9, 0);
}
if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
@@ -1204,6 +1755,8 @@ handle_semantic:
}
}
+ shader->nr_param_exports = param_count;
+
/* We need to add the position output manually if it's missing. */
if (!pos_args[0][0]) {
pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
@@ -1220,11 +1773,13 @@ handle_semantic:
/* Write the misc vector (point size, edgeflag, layer, viewport). */
if (shader->selector->info.writes_psize ||
shader->selector->info.writes_edgeflag ||
+ shader->selector->info.writes_viewport_index ||
shader->selector->info.writes_layer) {
pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
shader->selector->info.writes_psize |
(shader->selector->info.writes_edgeflag << 1) |
- (shader->selector->info.writes_layer << 2));
+ (shader->selector->info.writes_layer << 2) |
+ (shader->selector->info.writes_viewport_index << 3));
pos_args[1][1] = uint->zero; /* EXEC mask */
pos_args[1][2] = uint->zero; /* last export? */
pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
@@ -1255,6 +1810,9 @@ handle_semantic:
if (shader->selector->info.writes_layer)
pos_args[1][7] = layer_value;
+
+ if (shader->selector->info.writes_viewport_index)
+ pos_args[1][8] = viewport_index_value;
}
for (i = 0; i < 4; i++)
@@ -1276,7 +1834,133 @@ handle_semantic:
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- pos_args[i], 9);
+ pos_args[i], 9, 0);
+ }
+}
+
+/* This only writes the tessellation factor levels. */
+static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct si_shader *shader = si_shader_ctx->shader;
+ unsigned tess_inner_index, tess_outer_index;
+ LLVMValueRef lds_base, lds_inner, lds_outer;
+ LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
+ LLVMValueRef out[6], vec0, vec1, invocation_id;
+ unsigned stride, outer_comps, inner_comps, i;
+ struct lp_build_if_state if_ctx;
+
+ invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+
+ /* Do this only for invocation 0, because the tess levels are per-patch,
+ * not per-vertex.
+ *
+ * This can't jump, because invocation 0 executes this. It should
+ * at least mask out the loads and stores for other invocations.
+ */
+ lp_build_if(&if_ctx, gallivm,
+ LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+ invocation_id, bld_base->uint_bld.zero, ""));
+
+ /* Determine the layout of one tess factor element in the buffer. */
+ switch (shader->key.tcs.prim_mode) {
+ case PIPE_PRIM_LINES:
+ stride = 2; /* 2 dwords, 1 vec2 store */
+ outer_comps = 2;
+ inner_comps = 0;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ stride = 4; /* 4 dwords, 1 vec4 store */
+ outer_comps = 3;
+ inner_comps = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
+ outer_comps = 4;
+ inner_comps = 2;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ /* Load tess_inner and tess_outer from LDS.
+ * Any invocation can write them, so we can't get them from a temporary.
+ */
+ tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
+ tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
+
+ lds_base = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
+ lp_build_const_int32(gallivm,
+ tess_inner_index * 4), "");
+ lds_outer = LLVMBuildAdd(gallivm->builder, lds_base,
+ lp_build_const_int32(gallivm,
+ tess_outer_index * 4), "");
+
+ for (i = 0; i < outer_comps; i++)
+ out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
+ for (i = 0; i < inner_comps; i++)
+ out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+
+ /* Convert the outputs to vectors for stores. */
+ vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
+ vec1 = NULL;
+
+ if (stride > 4)
+ vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
+
+ /* Get the buffer. */
+ rw_buffers = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
+ buffer = build_indexed_load_const(si_shader_ctx, rw_buffers,
+ lp_build_const_int32(gallivm, SI_RING_TESS_FACTOR));
+
+ /* Get the offset. */
+ tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_TESS_FACTOR_OFFSET);
+ rel_patch_id = get_rel_patch_id(si_shader_ctx);
+ byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
+ lp_build_const_int32(gallivm, 4 * stride), "");
+
+ /* Store the outputs. */
+ build_tbuffer_store_dwords(si_shader_ctx, buffer, vec0,
+ MIN2(stride, 4), byteoffset, tf_base, 0);
+ if (vec1)
+ build_tbuffer_store_dwords(si_shader_ctx, buffer, vec1,
+ stride - 4, byteoffset, tf_base, 16);
+ lp_build_endif(&if_ctx);
+}
+
+static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader *shader = si_shader_ctx->shader;
+ struct tgsi_shader_info *info = &shader->selector->info;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ unsigned i, chan;
+ LLVMValueRef vertex_id = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_rel_auto_id);
+ LLVMValueRef vertex_dw_stride =
+ unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 13, 8);
+ LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
+ vertex_dw_stride, "");
+
+ /* Write outputs to LDS. The next shader (TCS aka HS) will read
+ * its inputs from it. */
+ for (i = 0; i < info->num_outputs; i++) {
+ LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i];
+ unsigned name = info->output_semantic_name[i];
+ unsigned index = info->output_semantic_index[i];
+ int param = si_shader_io_get_unique_index(name, index);
+ LLVMValueRef dw_addr = LLVMBuildAdd(gallivm->builder, base_dw_addr,
+ lp_build_const_int32(gallivm, param * 4), "");
+
+ for (chan = 0; chan < 4; chan++) {
+ lds_store(bld_base, chan, dw_addr,
+ LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""));
+ }
}
}
@@ -1288,17 +1972,25 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
struct tgsi_shader_info *info = &es->selector->info;
LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_ES2GS_OFFSET);
+ si_shader_ctx->param_es2gs_offset);
+ uint64_t enabled_outputs = si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL ?
+ es->key.tes.es_enabled_outputs :
+ es->key.vs.es_enabled_outputs;
unsigned chan;
int i;
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
si_shader_ctx->radeon_bld.soa.outputs[i];
- int param_index = get_param_index(info->output_semantic_name[i],
- info->output_semantic_index[i],
- es->key.vs.gs_used_inputs);
+ int param_index;
+ if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
+ info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
+ continue;
+
+ param_index = get_param_index(info->output_semantic_name[i],
+ info->output_semantic_index[i],
+ enabled_outputs);
if (param_index < 0)
continue;
@@ -1326,7 +2018,7 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_NOP | SENDMSG_GS_DONE);
args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
- build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
+ lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
LLVMVoidTypeInContext(gallivm->context), args, 2,
LLVMNoUnwindAttribute);
}
@@ -1339,7 +2031,7 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
struct si_shader_output_values *outputs = NULL;
int i,j;
- outputs = MALLOC(info->num_outputs * sizeof(outputs[0]));
+ outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
for (i = 0; i < info->num_outputs; i++) {
outputs[i].name = info->output_semantic_name[i];
@@ -1352,7 +2044,19 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
"");
}
- si_llvm_export_vs(bld_base, outputs, info->num_outputs);
+ /* Export PrimitiveID when PS needs it. */
+ if (si_vs_exports_prim_id(si_shader_ctx->shader)) {
+ outputs[i].name = TGSI_SEMANTIC_PRIMID;
+ outputs[i].sid = 0;
+ outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ get_primitive_id(bld_base, 0));
+ outputs[i].values[1] = bld_base->base.undef;
+ outputs[i].values[2] = bld_base->base.undef;
+ outputs[i].values[3] = bld_base->base.undef;
+ i++;
+ }
+
+ si_llvm_export_vs(bld_base, outputs, i);
FREE(outputs);
}
@@ -1417,7 +2121,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- last_args, 9);
+ last_args, 9, 0);
}
/* This instruction will be emitted at the end of the shader. */
@@ -1434,14 +2138,14 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
+ args, 9, 0);
}
}
} else {
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
+ args, 9, 0);
}
}
@@ -1503,7 +2207,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
+ args, 9, 0);
else
memcpy(last_args, args, sizeof(args));
}
@@ -1534,7 +2238,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- last_args, 9);
+ last_args, 9, 0);
}
static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
@@ -1563,15 +2267,36 @@ static void tex_fetch_args(
const struct tgsi_full_instruction * inst = emit_data->inst;
unsigned opcode = inst->Instruction.Opcode;
unsigned target = inst->Texture.Texture;
- LLVMValueRef coords[5];
+ LLVMValueRef coords[5], derivs[6];
LLVMValueRef address[16];
int ref_pos;
unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
unsigned count = 0;
unsigned chan;
- unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
- unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
+ unsigned sampler_src;
+ unsigned sampler_index;
+ unsigned num_deriv_channels = 0;
bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
+ LLVMValueRef res_ptr, samp_ptr;
+
+ sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
+ sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
+
+ if (emit_data->inst->Src[sampler_src].Register.Indirect) {
+ const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
+ LLVMValueRef ind_index;
+
+ ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
+
+ res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+
+ samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+ samp_ptr = build_indexed_load_const(si_shader_ctx, samp_ptr, ind_index);
+ } else {
+ res_ptr = si_shader_ctx->resources[sampler_index];
+ samp_ptr = si_shader_ctx->samplers[sampler_index];
+ }
if (target == TGSI_TEXTURE_BUFFER) {
LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
@@ -1580,7 +2305,7 @@ static void tex_fetch_args(
LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
/* Bitcast and truncate v8i32 to v16i8. */
- LLVMValueRef res = si_shader_ctx->resources[sampler_index];
+ LLVMValueRef res = res_ptr;
res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
@@ -1649,18 +2374,13 @@ static void tex_fetch_args(
}
}
- if (target == TGSI_TEXTURE_CUBE ||
- target == TGSI_TEXTURE_CUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE ||
- target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
-
/* Pack user derivatives */
if (opcode == TGSI_OPCODE_TXD) {
- int num_deriv_channels, param;
+ int param, num_src_deriv_channels;
switch (target) {
case TGSI_TEXTURE_3D:
+ num_src_deriv_channels = 3;
num_deriv_channels = 3;
break;
case TGSI_TEXTURE_2D:
@@ -1669,27 +2389,44 @@ static void tex_fetch_args(
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ num_src_deriv_channels = 2;
+ num_deriv_channels = 2;
+ break;
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ /* Cube derivatives will be converted to 2D. */
+ num_src_deriv_channels = 3;
num_deriv_channels = 2;
break;
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ num_src_deriv_channels = 1;
num_deriv_channels = 1;
break;
default:
assert(0); /* no other targets are valid here */
}
- for (param = 1; param <= 2; param++)
- for (chan = 0; chan < num_deriv_channels; chan++)
- address[count++] = lp_build_emit_fetch(bld_base, inst, param, chan);
+ for (param = 0; param < 2; param++)
+ for (chan = 0; chan < num_src_deriv_channels; chan++)
+ derivs[param * num_src_deriv_channels + chan] =
+ lp_build_emit_fetch(bld_base, inst, param+1, chan);
}
+ if (target == TGSI_TEXTURE_CUBE ||
+ target == TGSI_TEXTURE_CUBE_ARRAY ||
+ target == TGSI_TEXTURE_SHADOWCUBE ||
+ target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
+ radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, derivs);
+
+ if (opcode == TGSI_OPCODE_TXD)
+ for (int i = 0; i < num_deriv_channels * 2; i++)
+ address[count++] = derivs[i];
+
/* Pack texture coordinates */
address[count++] = coords[0];
if (num_coords > 1)
@@ -1806,7 +2543,7 @@ static void tex_fetch_args(
}
/* Resource */
- emit_data->args[1] = si_shader_ctx->resources[sampler_index];
+ emit_data->args[1] = res_ptr;
if (opcode == TGSI_OPCODE_TXF) {
/* add tex offsets */
@@ -1889,7 +2626,7 @@ static void tex_fetch_args(
dmask = 1 << gather_comp;
}
- emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+ emit_data->args[2] = samp_ptr;
emit_data->args[3] = lp_build_const_int32(gallivm, dmask);
emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
@@ -1905,7 +2642,7 @@ static void tex_fetch_args(
LLVMFloatTypeInContext(gallivm->context),
4);
} else {
- emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+ emit_data->args[2] = samp_ptr;
emit_data->args[3] = lp_build_const_int32(gallivm, target);
emit_data->arg_count = 4;
@@ -1940,7 +2677,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
emit_data->inst->Texture.NumOffsets > 0 : false;
if (target == TGSI_TEXTURE_BUFFER) {
- emit_data->output[emit_data->chan] = build_intrinsic(
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder,
"llvm.SI.vs.load.input", emit_data->dst_type,
emit_data->args, emit_data->arg_count,
@@ -1989,7 +2726,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
is_shadow ? ".c" : "", infix, has_offset ? ".o" : "",
LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
- emit_data->output[emit_data->chan] = build_intrinsic(
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder, intr_name, emit_data->dst_type,
emit_data->args, emit_data->arg_count,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -2036,7 +2773,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
sprintf(intr_name, "%s.v%ui32", name,
LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
- emit_data->output[emit_data->chan] = build_intrinsic(
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder, intr_name, emit_data->dst_type,
emit_data->args, emit_data->arg_count,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -2050,17 +2787,47 @@ static void txq_fetch_args(
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
const struct tgsi_full_instruction *inst = emit_data->inst;
struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
unsigned target = inst->Texture.Texture;
+ LLVMValueRef res_ptr;
+
+ if (inst->Src[1].Register.Indirect) {
+ const struct tgsi_full_src_register *reg = &inst->Src[1];
+ LLVMValueRef ind_index;
+
+ ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
+
+ res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr,
+ ind_index);
+ } else
+ res_ptr = si_shader_ctx->resources[inst->Src[1].Register.Index];
if (target == TGSI_TEXTURE_BUFFER) {
LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
/* Read the size from the buffer descriptor directly. */
- LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index];
- size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
- size = LLVMBuildExtractElement(gallivm->builder, size,
- lp_build_const_int32(gallivm, 6), "");
+ LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+ LLVMValueRef size = LLVMBuildExtractElement(builder, res,
+ lp_build_const_int32(gallivm, 6), "");
+
+ if (si_shader_ctx->screen->b.chip_class >= VI) {
+ /* On VI, the descriptor contains the size in bytes,
+ * but TXQ must return the size in elements.
+ * The stride is always non-zero for resources using TXQ.
+ */
+ LLVMValueRef stride =
+ LLVMBuildExtractElement(builder, res,
+ lp_build_const_int32(gallivm, 5), "");
+ stride = LLVMBuildLShr(builder, stride,
+ lp_build_const_int32(gallivm, 16), "");
+ stride = LLVMBuildAnd(builder, stride,
+ lp_build_const_int32(gallivm, 0x3FFF), "");
+
+ size = LLVMBuildUDiv(builder, size, stride, "");
+ }
+
emit_data->args[0] = size;
return;
}
@@ -2069,7 +2836,7 @@ static void txq_fetch_args(
emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
/* Resource */
- emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index];
+ emit_data->args[1] = res_ptr;
/* Texture target */
if (target == TGSI_TEXTURE_CUBE_ARRAY ||
@@ -2116,6 +2883,35 @@ static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
}
}
+/*
+ * SI implements derivatives using the local data store (LDS)
+ * All writes to the LDS happen in all executing threads at
+ * the same time. TID is the Thread ID for the current
+ * thread and is a value between 0 and 63, representing
+ * the thread's position in the wavefront.
+ *
+ * For the pixel shader threads are grouped into quads of four pixels.
+ * The TIDs of the pixels of a quad are:
+ *
+ * +------+------+
+ * |4n + 0|4n + 1|
+ * +------+------+
+ * |4n + 2|4n + 3|
+ * +------+------+
+ *
+ * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
+ * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
+ * the current pixel's column, and masking with 0xfffffffe yields the TID
+ * of the left pixel of the current pixel's row.
+ *
+ * Adding 1 yields the TID of the pixel to the right of the left pixel, and
+ * adding 2 yields the TID of the pixel below the top pixel.
+ */
+/* masks for thread ID. */
+#define TID_MASK_TOP_LEFT 0xfffffffc
+#define TID_MASK_TOP 0xfffffffd
+#define TID_MASK_LEFT 0xfffffffe
+
static void si_llvm_emit_ddxy(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -2132,25 +2928,34 @@ static void si_llvm_emit_ddxy(
LLVMTypeRef i32;
unsigned swizzle[4];
unsigned c;
+ int idx;
+ unsigned mask;
i32 = LLVMInt32TypeInContext(gallivm->context);
indices[0] = bld_base->uint_bld.zero;
- indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
+ indices[1] = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
NULL, 0, LLVMReadNoneAttribute);
- store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
+ if (opcode == TGSI_OPCODE_DDX_FINE)
+ mask = TID_MASK_LEFT;
+ else if (opcode == TGSI_OPCODE_DDY_FINE)
+ mask = TID_MASK_TOP;
+ else
+ mask = TID_MASK_TOP_LEFT;
+
indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm, 0xfffffffc), "");
- load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ lp_build_const_int32(gallivm, mask), "");
+ load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
+ /* for DDX we want to next X pixel, DDY next Y pixel. */
+ idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm,
- opcode == TGSI_OPCODE_DDX ? 1 : 2),
- "");
- load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ lp_build_const_int32(gallivm, idx), "");
+ load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
for (c = 0; c < 4; ++c) {
@@ -2184,6 +2989,247 @@ static void si_llvm_emit_ddxy(
emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
}
+/*
+ * this takes an I,J coordinate pair,
+ * and works out the X and Y derivatives.
+ * it returns DDX(I), DDX(J), DDY(I), DDY(J).
+ */
+static LLVMValueRef si_llvm_emit_ddxy_interp(
+ struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef interp_ij)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_context *base = &bld_base->base;
+ LLVMValueRef indices[2];
+ LLVMValueRef store_ptr, load_ptr_x, load_ptr_y, load_ptr_ddx, load_ptr_ddy, temp, temp2;
+ LLVMValueRef tl, tr, bl, result[4];
+ LLVMTypeRef i32;
+ unsigned c;
+
+ i32 = LLVMInt32TypeInContext(gallivm->context);
+
+ indices[0] = bld_base->uint_bld.zero;
+ indices[1] = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
+ NULL, 0, LLVMReadNoneAttribute);
+ store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ temp = LLVMBuildAnd(gallivm->builder, indices[1],
+ lp_build_const_int32(gallivm, TID_MASK_LEFT), "");
+
+ temp2 = LLVMBuildAnd(gallivm->builder, indices[1],
+ lp_build_const_int32(gallivm, TID_MASK_TOP), "");
+
+ indices[1] = temp;
+ load_ptr_x = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ indices[1] = temp2;
+ load_ptr_y = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ indices[1] = LLVMBuildAdd(gallivm->builder, temp,
+ lp_build_const_int32(gallivm, 1), "");
+ load_ptr_ddx = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ indices[1] = LLVMBuildAdd(gallivm->builder, temp2,
+ lp_build_const_int32(gallivm, 2), "");
+ load_ptr_ddy = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ for (c = 0; c < 2; ++c) {
+ LLVMValueRef store_val;
+ LLVMValueRef c_ll = lp_build_const_int32(gallivm, c);
+
+ store_val = LLVMBuildExtractElement(gallivm->builder,
+ interp_ij, c_ll, "");
+ LLVMBuildStore(gallivm->builder,
+ store_val,
+ store_ptr);
+
+ tl = LLVMBuildLoad(gallivm->builder, load_ptr_x, "");
+ tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
+
+ tr = LLVMBuildLoad(gallivm->builder, load_ptr_ddx, "");
+ tr = LLVMBuildBitCast(gallivm->builder, tr, base->elem_type, "");
+
+ result[c] = LLVMBuildFSub(gallivm->builder, tr, tl, "");
+
+ tl = LLVMBuildLoad(gallivm->builder, load_ptr_y, "");
+ tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
+
+ bl = LLVMBuildLoad(gallivm->builder, load_ptr_ddy, "");
+ bl = LLVMBuildBitCast(gallivm->builder, bl, base->elem_type, "");
+
+ result[c + 2] = LLVMBuildFSub(gallivm->builder, bl, tl, "");
+ }
+
+ return lp_build_gather_values(gallivm, result, 4);
+}
+
+static void interp_fetch_args(
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
+ /* offset is in second src, first two channels */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 1,
+ 0);
+ emit_data->args[1] = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 1,
+ 1);
+ emit_data->arg_count = 2;
+ } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ LLVMValueRef sample_position;
+ LLVMValueRef sample_id;
+ LLVMValueRef halfval = lp_build_const_float(gallivm, 0.5f);
+
+ /* fetch sample ID, then fetch its sample position,
+ * and place into first two channels.
+ */
+ sample_id = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 1, 0);
+ sample_id = LLVMBuildBitCast(gallivm->builder, sample_id,
+ LLVMInt32TypeInContext(gallivm->context),
+ "");
+ sample_position = load_sample_position(&si_shader_ctx->radeon_bld, sample_id);
+
+ emit_data->args[0] = LLVMBuildExtractElement(gallivm->builder,
+ sample_position,
+ lp_build_const_int32(gallivm, 0), "");
+
+ emit_data->args[0] = LLVMBuildFSub(gallivm->builder, emit_data->args[0], halfval, "");
+ emit_data->args[1] = LLVMBuildExtractElement(gallivm->builder,
+ sample_position,
+ lp_build_const_int32(gallivm, 1), "");
+ emit_data->args[1] = LLVMBuildFSub(gallivm->builder, emit_data->args[1], halfval, "");
+ emit_data->arg_count = 2;
+ }
+}
+
+static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader *shader = si_shader_ctx->shader;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef interp_param;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ const char *intr_name;
+ int input_index;
+ int chan;
+ int i;
+ LLVMValueRef attr_number;
+ LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
+ LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
+ int interp_param_idx;
+ unsigned location;
+
+ assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
+ input_index = inst->Src[0].Register.Index;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+ else
+ location = TGSI_INTERPOLATE_LOC_CENTROID;
+
+ interp_param_idx = lookup_interp_param_index(shader->ps_input_interpolate[input_index],
+ location);
+ if (interp_param_idx == -1)
+ return;
+ else if (interp_param_idx)
+ interp_param = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, interp_param_idx);
+ else
+ interp_param = NULL;
+
+ attr_number = lp_build_const_int32(gallivm,
+ shader->ps_input_param_offset[input_index]);
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ LLVMValueRef ij_out[2];
+ LLVMValueRef ddxy_out = si_llvm_emit_ddxy_interp(bld_base, interp_param);
+
+ /*
+ * take the I then J parameters, and the DDX/Y for it, and
+ * calculate the IJ inputs for the interpolator.
+ * temp1 = ddx * offset/sample.x + I;
+ * interp_param.I = ddy * offset/sample.y + temp1;
+ * temp1 = ddx * offset/sample.x + J;
+ * interp_param.J = ddy * offset/sample.y + temp1;
+ */
+ for (i = 0; i < 2; i++) {
+ LLVMValueRef ix_ll = lp_build_const_int32(gallivm, i);
+ LLVMValueRef iy_ll = lp_build_const_int32(gallivm, i + 2);
+ LLVMValueRef ddx_el = LLVMBuildExtractElement(gallivm->builder,
+ ddxy_out, ix_ll, "");
+ LLVMValueRef ddy_el = LLVMBuildExtractElement(gallivm->builder,
+ ddxy_out, iy_ll, "");
+ LLVMValueRef interp_el = LLVMBuildExtractElement(gallivm->builder,
+ interp_param, ix_ll, "");
+ LLVMValueRef temp1, temp2;
+
+ interp_el = LLVMBuildBitCast(gallivm->builder, interp_el,
+ LLVMFloatTypeInContext(gallivm->context), "");
+
+ temp1 = LLVMBuildFMul(gallivm->builder, ddx_el, emit_data->args[0], "");
+
+ temp1 = LLVMBuildFAdd(gallivm->builder, temp1, interp_el, "");
+
+ temp2 = LLVMBuildFMul(gallivm->builder, ddy_el, emit_data->args[1], "");
+
+ temp2 = LLVMBuildFAdd(gallivm->builder, temp2, temp1, "");
+
+ ij_out[i] = LLVMBuildBitCast(gallivm->builder,
+ temp2,
+ LLVMIntTypeInContext(gallivm->context, 32), "");
+ }
+ interp_param = lp_build_gather_values(bld_base->base.gallivm, ij_out, 2);
+ }
+
+ intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
+ for (chan = 0; chan < 2; chan++) {
+ LLVMValueRef args[4];
+ LLVMValueRef llvm_chan;
+ unsigned schan;
+
+ schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
+ llvm_chan = lp_build_const_int32(gallivm, schan);
+
+ args[0] = llvm_chan;
+ args[1] = attr_number;
+ args[2] = params;
+ args[3] = interp_param;
+
+ emit_data->output[chan] =
+ lp_build_intrinsic(gallivm->builder, intr_name,
+ input_type, args, args[3] ? 4 : 3,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ }
+}
+
+static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
+ struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
+ unsigned stream;
+
+ assert(src0.File == TGSI_FILE_IMMEDIATE);
+
+ stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 0x3;
+ return stream;
+}
+
/* Emit one vertex from the geometry shader */
static void si_llvm_emit_vertex(
const struct lp_build_tgsi_action *action,
@@ -2203,9 +3249,14 @@ static void si_llvm_emit_vertex(
LLVMValueRef args[2];
unsigned chan;
int i;
+ unsigned stream;
+
+ stream = si_llvm_get_stream(bld_base, emit_data);
/* Write vertex attribute values to GSVS ring */
- gs_next_vertex = LLVMBuildLoad(gallivm->builder, si_shader_ctx->gs_next_vertex, "");
+ gs_next_vertex = LLVMBuildLoad(gallivm->builder,
+ si_shader_ctx->gs_next_vertex[stream],
+ "");
/* If this thread has already emitted the declared maximum number of
* vertices, kill it: excessive vertex emissions are not supposed to
@@ -2218,8 +3269,9 @@ static void si_llvm_emit_vertex(
kill = lp_build_select(&bld_base->base, can_emit,
lp_build_const_float(gallivm, 1.0f),
lp_build_const_float(gallivm, -1.0f));
- build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
- LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
+
+ lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+ LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
@@ -2237,7 +3289,7 @@ static void si_llvm_emit_vertex(
out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
build_tbuffer_store(si_shader_ctx,
- si_shader_ctx->gsvs_ring,
+ si_shader_ctx->gsvs_ring[stream],
out_val, 1,
voffset, soffset, 0,
V_008F0C_BUF_DATA_FORMAT_32,
@@ -2247,12 +3299,13 @@ static void si_llvm_emit_vertex(
}
gs_next_vertex = lp_build_add(uint, gs_next_vertex,
lp_build_const_int32(gallivm, 1));
- LLVMBuildStore(gallivm->builder, gs_next_vertex, si_shader_ctx->gs_next_vertex);
+
+ LLVMBuildStore(gallivm->builder, gs_next_vertex, si_shader_ctx->gs_next_vertex[stream]);
/* Signal vertex emission */
- args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS);
+ args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS | (stream << 8));
args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
- build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
+ lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
LLVMVoidTypeInContext(gallivm->context), args, 2,
LLVMNoUnwindAttribute);
}
@@ -2266,15 +3319,28 @@ static void si_llvm_emit_primitive(
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMValueRef args[2];
+ unsigned stream;
/* Signal primitive cut */
- args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | SENDMSG_GS);
+ stream = si_llvm_get_stream(bld_base, emit_data);
+ args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | SENDMSG_GS | (stream << 8));
args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
- build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
+ lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
LLVMVoidTypeInContext(gallivm->context), args, 2,
LLVMNoUnwindAttribute);
}
+static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+ lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.barrier.local",
+ LLVMVoidTypeInContext(gallivm->context), NULL, 0,
+ LLVMNoUnwindAttribute);
+}
+
static const struct lp_build_tgsi_action tex_action = {
.fetch_args = tex_fetch_args,
.emit = build_tex_intrinsic,
@@ -2286,6 +3352,11 @@ static const struct lp_build_tgsi_action txq_action = {
.intr_name = "llvm.SI.resinfo"
};
+static const struct lp_build_tgsi_action interp_action = {
+ .fetch_args = interp_fetch_args,
+ .emit = build_interp_intrinsic,
+};
+
static void create_meta_data(struct si_shader_context *si_shader_ctx)
{
struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
@@ -2304,6 +3375,27 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
CONST_ADDR_SPACE);
}
+static void declare_streamout_params(struct si_shader_context *si_shader_ctx,
+ struct pipe_stream_output_info *so,
+ LLVMTypeRef *params, LLVMTypeRef i32,
+ unsigned *num_params)
+{
+ int i;
+
+ /* Streamout SGPRs. */
+ if (so->num_outputs) {
+ params[si_shader_ctx->param_streamout_config = (*num_params)++] = i32;
+ params[si_shader_ctx->param_streamout_write_index = (*num_params)++] = i32;
+ }
+ /* A streamout buffer offset is loaded if the stride is non-zero. */
+ for (i = 0; i < 4; i++) {
+ if (!so->stride[i])
+ continue;
+
+ params[si_shader_ctx->param_streamout_offset[i] = (*num_params)++] = i32;
+ }
+}
+
static void create_function(struct si_shader_context *si_shader_ctx)
{
struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
@@ -2336,8 +3428,10 @@ static void create_function(struct si_shader_context *si_shader_ctx)
num_params = SI_PARAM_START_INSTANCE+1;
if (shader->key.vs.as_es) {
- params[SI_PARAM_ES2GS_OFFSET] = i32;
- num_params++;
+ params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+ } else if (shader->key.vs.as_ls) {
+ params[SI_PARAM_LS_OUT_LAYOUT] = i32;
+ num_params = SI_PARAM_LS_OUT_LAYOUT+1;
} else {
if (shader->is_gs_copy_shader) {
last_array_pointer = SI_PARAM_CONST;
@@ -2345,30 +3439,52 @@ static void create_function(struct si_shader_context *si_shader_ctx)
}
/* The locations of the other parameters are assigned dynamically. */
-
- /* Streamout SGPRs. */
- if (shader->selector->so.num_outputs) {
- params[si_shader_ctx->param_streamout_config = num_params++] = i32;
- params[si_shader_ctx->param_streamout_write_index = num_params++] = i32;
- }
- /* A streamout buffer offset is loaded if the stride is non-zero. */
- for (i = 0; i < 4; i++) {
- if (!shader->selector->so.stride[i])
- continue;
-
- params[si_shader_ctx->param_streamout_offset[i] = num_params++] = i32;
- }
+ declare_streamout_params(si_shader_ctx, &shader->selector->so,
+ params, i32, &num_params);
}
last_sgpr = num_params-1;
/* VGPRs */
params[si_shader_ctx->param_vertex_id = num_params++] = i32;
- params[num_params++] = i32; /* unused*/
- params[num_params++] = i32; /* unused */
+ params[si_shader_ctx->param_rel_auto_id = num_params++] = i32;
+ params[si_shader_ctx->param_vs_prim_id = num_params++] = i32;
params[si_shader_ctx->param_instance_id = num_params++] = i32;
break;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+ params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+ params[SI_PARAM_TCS_IN_LAYOUT] = i32;
+ params[SI_PARAM_TESS_FACTOR_OFFSET] = i32;
+ last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
+
+ /* VGPRs */
+ params[SI_PARAM_PATCH_ID] = i32;
+ params[SI_PARAM_REL_IDS] = i32;
+ num_params = SI_PARAM_REL_IDS+1;
+ break;
+
+ case TGSI_PROCESSOR_TESS_EVAL:
+ params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+ params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+ num_params = SI_PARAM_TCS_OUT_LAYOUT+1;
+
+ if (shader->key.tes.as_es) {
+ params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+ } else {
+ declare_streamout_params(si_shader_ctx, &shader->selector->so,
+ params, i32, &num_params);
+ }
+ last_sgpr = num_params - 1;
+
+ /* VGPRs */
+ params[si_shader_ctx->param_tes_u = num_params++] = f32;
+ params[si_shader_ctx->param_tes_v = num_params++] = f32;
+ params[si_shader_ctx->param_tes_rel_patch_id = num_params++] = i32;
+ params[si_shader_ctx->param_tes_patch_id = num_params++] = i32;
+ break;
+
case TGSI_PROCESSOR_GEOMETRY:
params[SI_PARAM_GS2VS_OFFSET] = i32;
params[SI_PARAM_GS_WAVE_ID] = i32;
@@ -2435,12 +3551,35 @@ static void create_function(struct si_shader_context *si_shader_ctx)
if (bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
- bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
- si_shader_ctx->ddxy_lds =
+ bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
+ si_shader_ctx->lds =
LLVMAddGlobalInAddressSpace(gallivm->module,
LLVMArrayType(i32, 64),
"ddxy_lds",
LOCAL_ADDR_SPACE);
+
+ if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
+ si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
+ si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
+ /* This is the upper bound, maximum is 32 inputs times 32 vertices */
+ unsigned vertex_data_dw_size = 32*32*4;
+ unsigned patch_data_dw_size = 32*4;
+ /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
+ unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
+ unsigned lds_dwords = patch_dw_size;
+
+ /* The actual size is computed outside of the shader to reduce
+ * the number of shader variants. */
+ si_shader_ctx->lds =
+ LLVMAddGlobalInAddressSpace(gallivm->module,
+ LLVMArrayType(i32, lds_dwords),
+ "tess_lds",
+ LOCAL_ADDR_SPACE);
+ }
}
static void preload_constants(struct si_shader_context *si_shader_ctx)
@@ -2517,9 +3656,13 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
struct gallivm_state * gallivm = bld_base->base.gallivm;
unsigned i;
- if (si_shader_ctx->type != TGSI_PROCESSOR_VERTEX ||
- si_shader_ctx->shader->key.vs.as_es ||
- !si_shader_ctx->shader->selector->so.num_outputs)
+ /* Streamout can only be used if the shader is compiled as VS. */
+ if (!si_shader_ctx->shader->selector->so.num_outputs ||
+ (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
+ (si_shader_ctx->shader->key.vs.as_es ||
+ si_shader_ctx->shader->key.vs.as_ls)) ||
+ (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+ si_shader_ctx->shader->key.tes.as_es))
return;
LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
@@ -2550,6 +3693,8 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
si_shader_ctx->shader->key.vs.as_es) ||
+ (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+ si_shader_ctx->shader->key.tes.as_es) ||
si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_ESGS);
@@ -2557,13 +3702,21 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
}
- if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY ||
- si_shader_ctx->shader->is_gs_copy_shader) {
+ if (si_shader_ctx->shader->is_gs_copy_shader) {
LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
- si_shader_ctx->gsvs_ring =
+ si_shader_ctx->gsvs_ring[0] =
build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
}
+ if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ int i;
+ for (i = 0; i < 4; i++) {
+ LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS + i);
+
+ si_shader_ctx->gsvs_ring[i] =
+ build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
+ }
+ }
}
void si_shader_binary_read_config(const struct si_screen *sscreen,
@@ -2637,26 +3790,54 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
}
}
-int si_shader_binary_read(struct si_screen *sscreen,
- struct si_shader *shader,
- const struct radeon_shader_binary *binary)
+int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
{
+ const struct radeon_shader_binary *binary = &shader->binary;
+ unsigned code_size = binary->code_size + binary->rodata_size;
+ unsigned char *ptr;
+
+ r600_resource_reference(&shader->bo, NULL);
+ shader->bo = si_resource_create_custom(&sscreen->b.b,
+ PIPE_USAGE_IMMUTABLE,
+ code_size);
+ if (!shader->bo)
+ return -ENOMEM;
+ ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
+ PIPE_TRANSFER_READ_WRITE);
+ util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
+ if (binary->rodata_size > 0) {
+ ptr += binary->code_size;
+ util_memcpy_cpu_to_le32(ptr, binary->rodata,
+ binary->rodata_size);
+ }
+
+ sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
+ return 0;
+}
+
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
+{
+ const struct radeon_shader_binary *binary = &shader->binary;
unsigned i;
- unsigned code_size;
- unsigned char *ptr;
bool dump = r600_can_dump_shader(&sscreen->b,
shader->selector ? shader->selector->tokens : NULL);
si_shader_binary_read_config(sscreen, shader, 0);
+ si_shader_binary_upload(sscreen, shader);
if (dump) {
- if (!binary->disassembled) {
- fprintf(stderr, "SI CODE:\n");
- for (i = 0; i < binary->code_size; i+=4 ) {
- fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
- binary->code[i + 2], binary->code[i + 1],
- binary->code[i]);
+ if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
+ if (binary->disasm_string) {
+ fprintf(stderr, "\nShader Disassembly:\n\n");
+ fprintf(stderr, "%s\n", binary->disasm_string);
+ } else {
+ fprintf(stderr, "SI CODE:\n");
+ for (i = 0; i < binary->code_size; i+=4 ) {
+ fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
+ binary->code[i + 2], binary->code[i + 1],
+ binary->code[i]);
+ }
}
}
@@ -2666,26 +3847,6 @@ int si_shader_binary_read(struct si_screen *sscreen,
shader->num_sgprs, shader->num_vgprs, binary->code_size,
shader->lds_size, shader->scratch_bytes_per_wave);
}
-
- /* copy new shader */
- code_size = binary->code_size + binary->rodata_size;
- r600_resource_reference(&shader->bo, NULL);
- shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE,
- code_size);
- if (shader->bo == NULL) {
- return -ENOMEM;
- }
-
-
- ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_READ_WRITE);
- util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
- if (binary->rodata_size > 0) {
- ptr += binary->code_size;
- util_memcpy_cpu_to_le32(ptr, binary->rodata, binary->rodata_size);
- }
-
- sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
-
return 0;
}
@@ -2693,15 +3854,16 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
LLVMTargetMachineRef tm, LLVMModuleRef mod)
{
int r = 0;
- bool dump = r600_can_dump_shader(&sscreen->b,
- shader->selector ? shader->selector->tokens : NULL);
- r = radeon_llvm_compile(mod, &shader->binary,
- r600_get_llvm_processor_name(sscreen->b.family), dump, tm);
+ bool dump_asm = r600_can_dump_shader(&sscreen->b,
+ shader->selector ? shader->selector->tokens : NULL);
+ bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);
- if (r) {
+ r = radeon_llvm_compile(mod, &shader->binary,
+ r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm);
+ if (r)
return r;
- }
- r = si_shader_binary_read(sscreen, shader, &shader->binary);
+
+ r = si_shader_binary_read(sscreen, shader);
FREE(shader->binary.config);
FREE(shader->binary.rodata);
@@ -2709,7 +3871,8 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
if (shader->scratch_bytes_per_wave == 0) {
FREE(shader->binary.code);
FREE(shader->binary.relocs);
- memset(&shader->binary, 0, sizeof(shader->binary));
+ memset(&shader->binary, 0,
+ offsetof(struct radeon_shader_binary, disasm_string));
}
return r;
}
@@ -2741,7 +3904,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
preload_streamout_buffers(si_shader_ctx);
preload_ring_buffers(si_shader_ctx);
- args[0] = si_shader_ctx->gsvs_ring;
+ args[0] = si_shader_ctx->gsvs_ring[0];
args[1] = lp_build_mul_imm(uint,
LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
si_shader_ctx->param_vertex_id),
@@ -2767,7 +3930,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
outputs[i].values[chan] =
LLVMBuildBitCast(gallivm->builder,
- build_intrinsic(gallivm->builder,
+ lp_build_intrinsic(gallivm->builder,
"llvm.SI.buffer.load.dword.i32.i32",
LLVMInt32TypeInContext(gallivm->context),
args, 9,
@@ -2807,9 +3970,21 @@ static void si_dump_key(unsigned shader, union si_shader_key *key)
fprintf(stderr, "}\n");
if (key->vs.as_es)
- fprintf(stderr, " gs_used_inputs = 0x%"PRIx64"\n",
- key->vs.gs_used_inputs);
+ fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n",
+ key->vs.es_enabled_outputs);
fprintf(stderr, " as_es = %u\n", key->vs.as_es);
+ fprintf(stderr, " as_es = %u\n", key->vs.as_ls);
+ break;
+
+ case PIPE_SHADER_TESS_CTRL:
+ fprintf(stderr, " prim_mode = %u\n", key->tcs.prim_mode);
+ break;
+
+ case PIPE_SHADER_TESS_EVAL:
+ if (key->tes.as_es)
+ fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n",
+ key->tes.es_enabled_outputs);
+ fprintf(stderr, " as_es = %u\n", key->tes.as_es);
break;
case PIPE_SHADER_GEOMETRY:
@@ -2851,7 +4026,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
- if (dump) {
+ if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
si_dump_key(sel->type, &shader->key);
tgsi_dump(tokens, 0);
si_dump_streamout(&sel->so);
@@ -2873,6 +4048,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
+ bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
+ bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
+ bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
+
bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
@@ -2888,9 +4067,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
+ bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
+ bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
+ bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
if (HAVE_LLVM >= 0x0306) {
bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
@@ -2908,11 +4090,25 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
switch (si_shader_ctx.type) {
case TGSI_PROCESSOR_VERTEX:
si_shader_ctx.radeon_bld.load_input = declare_input_vs;
- if (shader->key.vs.as_es) {
+ if (shader->key.vs.as_ls)
+ bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
+ else if (shader->key.vs.as_es)
bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
- } else {
+ else
+ bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
+ break;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
+ bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
+ bld_base->emit_store = store_output_tcs;
+ bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
+ break;
+ case TGSI_PROCESSOR_TESS_EVAL:
+ bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
+ if (shader->key.tes.as_es)
+ bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
+ else
bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
- }
break;
case TGSI_PROCESSOR_GEOMETRY:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
@@ -2946,9 +4142,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
preload_ring_buffers(&si_shader_ctx);
if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- si_shader_ctx.gs_next_vertex =
- lp_build_alloca(bld_base->base.gallivm,
- bld_base->uint_bld.elem_type, "");
+ int i;
+ for (i = 0; i < 4; i++) {
+ si_shader_ctx.gs_next_vertex[i] =
+ lp_build_alloca(bld_base->base.gallivm,
+ bld_base->uint_bld.elem_type, "");
+ }
}
if (!lp_build_tgsi_llvm(bld_base, tokens)) {
@@ -3000,4 +4199,5 @@ void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
FREE(shader->binary.code);
FREE(shader->binary.relocs);
+ FREE(shader->binary.disasm_string);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 51055afe36a..cd845c12e64 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -26,6 +26,46 @@
* Christian König <[email protected]>
*/
+/* How linking tessellation shader inputs and outputs works.
+ *
+ * Inputs and outputs between shaders are stored in a buffer. This buffer
+ * lives in LDS (typical case for tessellation), but it can also live
+ * in memory. Each input or output has a fixed location within a vertex.
+ * The highest used input or output determines the stride between vertices.
+ *
+ * Since tessellation is only enabled in the OpenGL core profile,
+ * only these semantics are valid for per-vertex data:
+ *
+ * Name Location
+ *
+ * POSITION 0
+ * PSIZE 1
+ * CLIPDIST0..1 2..3
+ * CULLDIST0..1 (not implemented)
+ * GENERIC0..31 4..35
+ *
+ * For example, a shader only writing GENERIC0 has the output stride of 5.
+ *
+ * Only these semantics are valid for per-patch data:
+ *
+ * Name Location
+ *
+ * TESSOUTER 0
+ * TESSINNER 1
+ * PATCH0..29 2..31
+ *
+ * That's how independent shaders agree on input and output locations.
+ * The si_shader_io_get_unique_index function assigns the locations.
+ *
+ * Other required information for calculating the input and output addresses
+ * like the vertex stride, the patch stride, and the offsets where per-vertex
+ * and per-patch data start, is passed to the shader via user data SGPRs.
+ * The offsets and strides are calculated at draw time and aren't available
+ * at compile time.
+ *
+ * The same approach should be used for linking ES->GS in the future.
+ */
+
#ifndef SI_SHADER_H
#define SI_SHADER_H
@@ -43,9 +83,16 @@ struct radeon_shader_reloc;
#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */
#define SI_SGPR_BASE_VERTEX 10 /* VS only */
#define SI_SGPR_START_INSTANCE 11 /* VS only */
+#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */
+#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */
+#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
+#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
#define SI_SGPR_ALPHA_REF 8 /* PS only */
#define SI_VS_NUM_USER_SGPR 12
+#define SI_LS_NUM_USER_SGPR 13
+#define SI_TCS_NUM_USER_SGPR 11
+#define SI_TES_NUM_USER_SGPR 10
#define SI_GS_NUM_USER_SGPR 8
#define SI_GSCOPY_NUM_USER_SGPR 4
#define SI_PS_NUM_USER_SGPR 9
@@ -62,8 +109,30 @@ struct radeon_shader_reloc;
#define SI_PARAM_START_INSTANCE 6
/* the other VS parameters are assigned dynamically */
-/* ES only parameters */
-#define SI_PARAM_ES2GS_OFFSET 7
+/* Offsets where TCS outputs and TCS patch outputs live in LDS:
+ * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
+ * [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
+ */
+#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
+
+/* Layout of TCS outputs / TES inputs:
+ * [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
+ * [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
+ * [26:31] = gl_PatchVerticesIn, max = 32
+ */
+#define SI_PARAM_TCS_OUT_LAYOUT 5 /* for TCS & TES */
+
+/* Layout of LS outputs / TCS inputs
+ * [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
+ * [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
+ */
+#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */
+#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */
+
+/* TCS only parameters. */
+#define SI_PARAM_TESS_FACTOR_OFFSET 7
+#define SI_PARAM_PATCH_ID 8
+#define SI_PARAM_REL_IDS 9
/* GS only parameters */
#define SI_PARAM_GS2VS_OFFSET 4
@@ -115,9 +184,25 @@ struct si_shader_selector {
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
- uint64_t gs_used_inputs; /* mask of "get_unique_index" bits */
+ unsigned gs_num_invocations;
+
+ /* masks of "get_unique_index" bits */
+ uint64_t inputs_read;
+ uint64_t outputs_written;
+ uint32_t patch_outputs_written;
};
+/* Valid shader configurations:
+ *
+ * API shaders VS | TCS | TES | GS |pass| PS
+ * are compiled as: | | | |thru|
+ * | | | | |
+ * Only VS & PS: VS | -- | -- | -- | -- | PS
+ * With GS: ES | -- | -- | GS | VS | PS
+ * With Tessel.: LS | HS | VS | -- | -- | PS
+ * With both: LS | HS | ES | GS | VS | PS
+ */
+
union si_shader_key {
struct {
unsigned export_16bpc:8;
@@ -130,11 +215,25 @@ union si_shader_key {
} ps;
struct {
unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
- /* The mask of "get_unique_index" bits, needed for ES,
- * it describes how the ES->GS ring buffer is laid out. */
- uint64_t gs_used_inputs;
- unsigned as_es:1;
+ /* Mask of "get_unique_index" bits - which outputs are read
+ * by the next stage (needed by ES).
+ * This describes how outputs are laid out in memory. */
+ uint64_t es_enabled_outputs;
+ unsigned as_es:1; /* export shader */
+ unsigned as_ls:1; /* local shader */
+ unsigned export_prim_id; /* when PS needs it and GS is disabled */
} vs;
+ struct {
+ unsigned prim_mode:3;
+ } tcs; /* tessellation control shader */
+ struct {
+ /* Mask of "get_unique_index" bits - which outputs are read
+ * by the next stage (needed by ES).
+ * This describes how outputs are laid out in memory. */
+ uint64_t es_enabled_outputs;
+ unsigned as_es:1; /* export shader */
+ unsigned export_prim_id; /* when PS needs it and GS is disabled */
+ } tes; /* tessellation evaluation shader */
};
struct si_shader {
@@ -161,27 +260,47 @@ struct si_shader {
unsigned nparam;
unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
unsigned ps_input_param_offset[PIPE_MAX_SHADER_INPUTS];
-
+ unsigned ps_input_interpolate[PIPE_MAX_SHADER_INPUTS];
bool uses_instanceid;
unsigned nr_pos_exports;
+ unsigned nr_param_exports;
bool is_gs_copy_shader;
bool dx10_clamp_mode; /* convert NaNs to 0 */
+
+ unsigned ls_rsrc1;
+ unsigned ls_rsrc2;
};
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
{
- return sctx->gs_shader ? &sctx->gs_shader->info
- : &sctx->vs_shader->info;
+ if (sctx->gs_shader)
+ return &sctx->gs_shader->info;
+ else if (sctx->tes_shader)
+ return &sctx->tes_shader->info;
+ else
+ return &sctx->vs_shader->info;
}
static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
{
if (sctx->gs_shader)
return sctx->gs_shader->current->gs_copy_shader;
+ else if (sctx->tes_shader)
+ return sctx->tes_shader->current;
else
return sctx->vs_shader->current;
}
+static inline bool si_vs_exports_prim_id(struct si_shader *shader)
+{
+ if (shader->selector->type == PIPE_SHADER_VERTEX)
+ return shader->key.vs.export_prim_id;
+ else if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+ return shader->key.tes.export_prim_id;
+ else
+ return false;
+}
+
/* radeonsi_shader.c */
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader);
@@ -189,8 +308,8 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
LLVMTargetMachineRef tm, LLVMModuleRef mod);
void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
-int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
- const struct radeon_shader_binary *binary);
+int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
uint64_t scratch_va);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6c18836d189..c923ea7e154 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -61,7 +61,7 @@ unsigned si_array_mode(unsigned mode)
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
{
- if (sscreen->b.chip_class == CIK &&
+ if (sscreen->b.chip_class >= CIK &&
sscreen->b.info.cik_macrotile_mode_array_valid) {
unsigned index, tileb;
@@ -489,11 +489,14 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
+ S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
info->writes_edgeflag ||
- info->writes_layer) |
+ info->writes_layer ||
+ info->writes_viewport_index) |
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
(sctx->queued.named.rasterizer->clip_plane_enable &
clipdist_mask));
r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -509,20 +512,26 @@ static void si_set_scissor_states(struct pipe_context *ctx,
const struct pipe_scissor_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
- struct si_pm4_state *pm4 = &scissor->pm4;
-
- if (scissor == NULL)
- return;
+ struct si_state_scissor *scissor;
+ struct si_pm4_state *pm4;
+ int i;
- scissor->scissor = *state;
- si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
- S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
- S_028250_WINDOW_OFFSET_DISABLE(1));
- si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
- S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
+ for (i = start_slot; i < start_slot + num_scissors; i++) {
+ int idx = i - start_slot;
+ int offset = i * 4 * 2;
- si_pm4_set_state(sctx, scissor, scissor);
+ scissor = CALLOC_STRUCT(si_state_scissor);
+ if (scissor == NULL)
+ return;
+ pm4 = &scissor->pm4;
+ scissor->scissor = state[idx];
+ si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
+ S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
+ S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
+ si_pm4_set_state(sctx, scissor[i], scissor);
+ }
}
static void si_set_viewport_states(struct pipe_context *ctx,
@@ -531,21 +540,29 @@ static void si_set_viewport_states(struct pipe_context *ctx,
const struct pipe_viewport_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
- struct si_pm4_state *pm4 = &viewport->pm4;
+ struct si_state_viewport *viewport;
+ struct si_pm4_state *pm4;
+ int i;
- if (viewport == NULL)
- return;
+ for (i = start_slot; i < start_slot + num_viewports; i++) {
+ int idx = i - start_slot;
+ int offset = i * 4 * 6;
- viewport->viewport = *state;
- si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
- si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
- si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
- si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
- si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
- si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
+ viewport = CALLOC_STRUCT(si_state_viewport);
+ if (!viewport)
+ return;
+ pm4 = &viewport->pm4;
+
+ viewport->viewport = state[idx];
+ si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE + offset, fui(state[idx].scale[0]));
+ si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET + offset, fui(state[idx].translate[0]));
+ si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE + offset, fui(state[idx].scale[1]));
+ si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET + offset, fui(state[idx].translate[1]));
+ si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE + offset, fui(state[idx].scale[2]));
+ si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET + offset, fui(state[idx].translate[2]));
- si_pm4_set_state(sctx, viewport, viewport);
+ si_pm4_set_state(sctx, viewport[i], viewport);
+ }
}
/*
@@ -649,7 +666,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
/* offset */
rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 12.0f;
+ rs->offset_scale = state->offset_scale * 16.0f;
si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
S_0286D4_FLAT_SHADE_ENA(1) |
@@ -718,12 +735,12 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
if (sctx->framebuffer.nr_samples > 1 &&
(!old_rs || old_rs->multisample_enable != rs->multisample_enable))
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_fb_rs_state(sctx);
- sctx->clip_regs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
}
static void si_delete_rs_state(struct pipe_context *ctx, void *state)
@@ -821,7 +838,8 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
- S_028800_ZFUNC(state->depth.func);
+ S_028800_ZFUNC(state->depth.func) |
+ S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
/* stencil */
if (state->stencil[0].enabled) {
@@ -850,9 +868,12 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
dsa->alpha_func = PIPE_FUNC_ALWAYS;
}
- /* misc */
si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
+ if (state->depth.bounds_test) {
+ si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
+ si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
+ }
return dsa;
}
@@ -888,7 +909,7 @@ static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
{
struct si_context *sctx = (struct si_context*)ctx;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
@@ -1157,7 +1178,9 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
int first_non_void)
{
struct si_screen *sscreen = (struct si_screen*)screen;
- bool enable_s3tc = sscreen->b.info.drm_minor >= 31;
+ bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
+ sscreen->b.info.drm_minor >= 31) ||
+ sscreen->b.info.drm_major == 3;
boolean uniform = TRUE;
int i;
@@ -1200,7 +1223,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
}
if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
- if (!enable_s3tc)
+ if (!enable_compressed_formats)
goto out_unknown;
switch (format) {
@@ -1220,7 +1243,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
}
if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
- if (!enable_s3tc)
+ if (!enable_compressed_formats)
goto out_unknown;
switch (format) {
@@ -1249,8 +1272,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
}
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
-
- if (!enable_s3tc)
+ if (!enable_compressed_formats)
goto out_unknown;
if (!util_format_s3tc_enabled) {
@@ -1606,7 +1628,6 @@ boolean si_is_format_supported(struct pipe_screen *screen,
unsigned sample_count,
unsigned usage)
{
- struct si_screen *sscreen = (struct si_screen *)screen;
unsigned retval = 0;
if (target >= PIPE_MAX_TEXTURE_TYPES) {
@@ -1618,8 +1639,7 @@ boolean si_is_format_supported(struct pipe_screen *screen,
return FALSE;
if (sample_count > 1) {
- /* 2D tiling on CIK is supported since DRM 2.35.0 */
- if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35)
+ if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
return FALSE;
switch (sample_count) {
@@ -1826,6 +1846,9 @@ static void si_initialize_color_surface(struct si_context *sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;
+ if (sctx->b.chip_class >= VI)
+ surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
+
if (rtex->fmask.size) {
surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
@@ -2023,7 +2046,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
util_format_is_pure_integer(state->cbufs[0]->format);
if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
@@ -2043,6 +2066,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (rtex->fmask.size && rtex->cmask.size) {
sctx->framebuffer.compressed_cb_mask |= 1 << i;
}
+ r600_context_add_resource_size(ctx, surf->base.texture);
}
/* Set the 16BPC export for possible dual-src blending. */
if (i == 1 && surf && surf->export_16bpc) {
@@ -2057,20 +2081,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
}
+ r600_context_add_resource_size(ctx, surf->base.texture);
}
si_update_fb_rs_state(sctx);
si_update_fb_blend_state(sctx);
- sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
+ sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
- sctx->framebuffer.atom.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
if (sctx->framebuffer.nr_samples != old_nr_samples) {
- sctx->msaa_config.dirty = true;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
/* Set sample locations as fragment shader constants. */
switch (sctx->framebuffer.nr_samples) {
@@ -2107,7 +2132,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
(sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
old_nr_samples != 1))
- sctx->msaa_sample_locs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs);
}
}
@@ -2141,20 +2166,24 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
RADEON_PRIO_COLOR_META);
}
- r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
+ r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
+ sctx->b.chip_class >= VI ? 14 : 13);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */
radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */
radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */
radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */
- radeon_emit(cs, 0); /* R_028C78 unused */
+ radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */
radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */
radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */
radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */
radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */
radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
+
+ if (sctx->b.chip_class >= VI)
+ radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0]) {
@@ -2249,22 +2278,35 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
sctx->ps_iter_samples = min_samples;
if (sctx->framebuffer.nr_samples > 1)
- sctx->msaa_config.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
/*
* Samplers
*/
-static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
- struct pipe_resource *texture,
- const struct pipe_sampler_view *state)
+/**
+ * Create a sampler view.
+ *
+ * @param ctx context
+ * @param texture texture
+ * @param state sampler view template
+ * @param width0 width0 override (for compressed textures as int)
+ * @param height0 height0 override (for compressed textures as int)
+ * @param force_level set the base address to the level (for compressed textures)
+ */
+struct pipe_sampler_view *
+si_create_sampler_view_custom(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *state,
+ unsigned width0, unsigned height0,
+ unsigned force_level)
{
struct si_context *sctx = (struct si_context*)ctx;
struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
struct r600_texture *tmp = (struct r600_texture*)texture;
const struct util_format_description *desc;
- unsigned format, num_format;
+ unsigned format, num_format, base_level, first_level, last_level;
uint32_t pitch = 0;
unsigned char state_swizzle[4], swizzle[4];
unsigned height, depth, width;
@@ -2297,7 +2339,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
/* Buffer resource. */
if (texture->target == PIPE_BUFFER) {
- unsigned stride;
+ unsigned stride, num_records;
desc = util_format_description(state->format);
first_non_void = util_format_get_first_non_void_channel(state->format);
@@ -2306,10 +2348,16 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
+ num_records = state->u.buf.last_element + 1 - state->u.buf.first_element;
+ num_records = MIN2(num_records, texture->width0 / stride);
+
+ if (sctx->b.chip_class >= VI)
+ num_records *= stride;
+
view->state[4] = va;
view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(stride);
- view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element;
+ view->state[6] = num_records;
view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
@@ -2437,13 +2485,25 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
format = 0;
}
- /* not supported any more */
- //endian = si_colorformat_endian_swap(format);
+ base_level = 0;
+ first_level = state->u.tex.first_level;
+ last_level = state->u.tex.last_level;
+ width = width0;
+ height = height0;
+ depth = texture->depth0;
- width = surflevel[0].npix_x;
- height = surflevel[0].npix_y;
- depth = surflevel[0].npix_z;
- pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
+ if (force_level) {
+ assert(force_level == first_level &&
+ force_level == last_level);
+ base_level = force_level;
+ first_level = 0;
+ last_level = 0;
+ width = u_minify(width, force_level);
+ height = u_minify(height, force_level);
+ depth = u_minify(depth, force_level);
+ }
+
+ pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format);
if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
height = 1;
@@ -2453,8 +2513,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
depth = texture->array_size / 6;
- va = tmp->resource.gpu_address + surflevel[0].offset;
- va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size;
+ va = tmp->resource.gpu_address + surflevel[base_level].offset;
view->state[0] = va >> 8;
view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
@@ -2467,11 +2526,11 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
- 0 : state->u.tex.first_level - tmp->mipmap_shift) |
+ 0 : first_level) |
S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
util_logbase2(texture->nr_samples) :
- state->u.tex.last_level - tmp->mipmap_shift) |
- S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
+ last_level) |
+ S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) |
S_008F1C_POW2_PAD(texture->last_level > 0) |
S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
@@ -2523,6 +2582,16 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
return &view->base;
}
+static struct pipe_sampler_view *
+si_create_sampler_view(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *state)
+{
+ return si_create_sampler_view_custom(ctx, texture, state,
+ texture ? texture->width0 : 0,
+ texture ? texture->height0 : 0, 0);
+}
+
static void si_sampler_view_destroy(struct pipe_context *ctx,
struct pipe_sampler_view *state)
{
@@ -2765,6 +2834,7 @@ static void si_set_vertex_buffers(struct pipe_context *ctx,
pipe_resource_reference(&dsti->buffer, src->buffer);
dsti->buffer_offset = src->buffer_offset;
dsti->stride = src->stride;
+ r600_context_add_resource_size(ctx, src->buffer);
}
} else {
for (i = 0; i < count; i++) {
@@ -2782,6 +2852,7 @@ static void si_set_index_buffer(struct pipe_context *ctx,
if (ib) {
pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer);
memcpy(&sctx->index_buffer, ib, sizeof(*ib));
+ r600_context_add_resource_size(ctx, ib->buffer);
} else {
pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
}
@@ -2845,6 +2916,30 @@ static void si_set_polygon_stipple(struct pipe_context *ctx,
}
}
+static void si_set_tess_state(struct pipe_context *ctx,
+ const float default_outer_level[4],
+ const float default_inner_level[2])
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct pipe_constant_buffer cb;
+ float array[8];
+
+ memcpy(array, default_outer_level, sizeof(float) * 4);
+ memcpy(array+4, default_inner_level, sizeof(float) * 2);
+
+ cb.buffer = NULL;
+ cb.user_buffer = NULL;
+ cb.buffer_size = sizeof(array);
+
+ si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
+ (void*)array, sizeof(array),
+ &cb.buffer_offset);
+
+ ctx->set_constant_buffer(ctx, PIPE_SHADER_TESS_CTRL,
+ SI_DRIVER_STATE_CONST_BUF, &cb);
+ pipe_resource_reference(&cb.buffer, NULL);
+}
+
static void si_texture_barrier(struct pipe_context *ctx)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -2870,6 +2965,8 @@ static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
}
+static void si_init_config(struct si_context *sctx);
+
void si_init_state_functions(struct si_context *sctx)
{
si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
@@ -2920,6 +3017,7 @@ void si_init_state_functions(struct si_context *sctx)
sctx->b.b.texture_barrier = si_texture_barrier;
sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
sctx->b.b.set_min_samples = si_set_min_samples;
+ sctx->b.b.set_tess_state = si_set_tess_state;
sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
@@ -2931,24 +3029,31 @@ void si_init_state_functions(struct si_context *sctx)
} else {
sctx->b.dma_copy = si_dma_copy;
}
+
+ si_init_config(sctx);
}
static void
si_write_harvested_raster_configs(struct si_context *sctx,
struct si_pm4_state *pm4,
- unsigned raster_config)
+ unsigned raster_config,
+ unsigned raster_config_1)
{
unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
- unsigned num_rb = sctx->screen->b.info.r600_num_backends;
- unsigned rb_per_pkr = num_rb / num_se / sh_per_se;
+ unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
+ unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
unsigned rb_per_se = num_rb / num_se;
- unsigned se0_mask = (1 << rb_per_se) - 1;
- unsigned se1_mask = se0_mask << rb_per_se;
+ unsigned se_mask[4];
unsigned se;
- assert(num_se == 1 || num_se == 2);
+ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+
+ assert(num_se == 1 || num_se == 2 || num_se == 4);
assert(sh_per_se == 1 || sh_per_se == 2);
assert(rb_per_pkr == 1 || rb_per_pkr == 2);
@@ -2956,17 +3061,16 @@ si_write_harvested_raster_configs(struct si_context *sctx,
* fields are for, so I'm leaving them as their default
* values. */
- se0_mask &= rb_mask;
- se1_mask &= rb_mask;
- if (num_se == 2 && (!se0_mask || !se1_mask)) {
- raster_config &= C_028350_SE_MAP;
+ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
+ (!se_mask[2] && !se_mask[3]))) {
+ raster_config_1 &= C_028354_SE_PAIR_MAP;
- if (!se0_mask) {
- raster_config |=
- S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
+ if (!se_mask[0] && !se_mask[1]) {
+ raster_config_1 |=
+ S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
} else {
- raster_config |=
- S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
+ raster_config_1 |=
+ S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
}
}
@@ -2974,10 +3078,23 @@ si_write_harvested_raster_configs(struct si_context *sctx,
unsigned raster_config_se = raster_config;
unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+ int idx = (se / 2) * 2;
+
+ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+ raster_config_se &= C_028350_SE_MAP;
+
+ if (!se_mask[idx]) {
+ raster_config_se |=
+ S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
+ } else {
+ raster_config_se |=
+ S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
+ }
+ }
pkr0_mask &= rb_mask;
pkr1_mask &= rb_mask;
- if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) {
+ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
raster_config_se &= C_028350_PKR_MAP;
if (!pkr0_mask) {
@@ -2989,7 +3106,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,
}
}
- if (rb_per_pkr == 2) {
+ if (rb_per_se >= 2) {
unsigned rb0_mask = 1 << (se * rb_per_se);
unsigned rb1_mask = rb0_mask << 1;
@@ -3007,7 +3124,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,
}
}
- if (sh_per_se == 2) {
+ if (rb_per_se > 2) {
rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
rb1_mask = rb0_mask << 1;
rb0_mask &= rb_mask;
@@ -3026,19 +3143,28 @@ si_write_harvested_raster_configs(struct si_context *sctx,
}
}
- si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
- SE_INDEX(se) | SH_BROADCAST_WRITES |
- INSTANCE_BROADCAST_WRITES);
+ /* GRBM_GFX_INDEX is privileged on VI */
+ if (sctx->b.chip_class <= CIK)
+ si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+ SE_INDEX(se) | SH_BROADCAST_WRITES |
+ INSTANCE_BROADCAST_WRITES);
si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
+ if (sctx->b.chip_class >= CIK)
+ si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
}
- si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
- SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
- INSTANCE_BROADCAST_WRITES);
+ /* GRBM_GFX_INDEX is privileged on VI */
+ if (sctx->b.chip_class <= CIK)
+ si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
+ INSTANCE_BROADCAST_WRITES);
}
-void si_init_config(struct si_context *sctx)
+static void si_init_config(struct si_context *sctx)
{
+ unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
+ unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+ unsigned raster_config, raster_config_1;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
if (pm4 == NULL)
@@ -3046,24 +3172,18 @@ void si_init_config(struct si_context *sctx)
si_cmd_context_control(pm4);
- si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
- si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
+ si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
+ si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
/* FIXME calculate these values somehow ??? */
si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
- si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0);
si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
- si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
- si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
- si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
- si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
-
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
@@ -3076,62 +3196,78 @@ void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
- if (sctx->b.chip_class >= CIK) {
- switch (sctx->screen->b.family) {
- case CHIP_BONAIRE:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
- si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
- break;
- case CHIP_HAWAII:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
- si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
- break;
- case CHIP_KAVERI:
- /* XXX todo */
- case CHIP_KABINI:
- /* XXX todo */
- case CHIP_MULLINS:
- /* XXX todo */
- default:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0);
- si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
- break;
- }
- } else {
- unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
- unsigned num_rb = sctx->screen->b.info.r600_num_backends;
- unsigned raster_config;
-
- switch (sctx->screen->b.family) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- raster_config = 0x2a00126a;
- break;
- case CHIP_VERDE:
- raster_config = 0x0000124a;
- break;
- case CHIP_OLAND:
- raster_config = 0x00000082;
- break;
- case CHIP_HAINAN:
- raster_config = 0;
- break;
- default:
- fprintf(stderr,
- "radeonsi: Unknown GPU, using 0 for raster_config\n");
- raster_config = 0;
- break;
- }
+ switch (sctx->screen->b.family) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ raster_config = 0x2a00126a;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_VERDE:
+ raster_config = 0x0000124a;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_OLAND:
+ raster_config = 0x00000082;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_HAINAN:
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_BONAIRE:
+ raster_config = 0x16000012;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_HAWAII:
+ raster_config = 0x3a00161a;
+ raster_config_1 = 0x0000002e;
+ break;
+ case CHIP_FIJI:
+ /* Fiji should be same as Hawaii, but that causes corruption in some cases */
+ raster_config = 0x16000012; /* 0x3a00161a */
+ raster_config_1 = 0x0000002a; /* 0x0000002e */
+ break;
+ case CHIP_TONGA:
+ raster_config = 0x16000012;
+ raster_config_1 = 0x0000002a;
+ break;
+ case CHIP_ICELAND:
+ raster_config = 0x00000002;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_CARRIZO:
+ raster_config = 0x00000002;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_KAVERI:
+ /* KV should be 0x00000002, but that causes problems with radeon */
+ raster_config = 0x00000000; /* 0x00000002 */
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
+ break;
+ default:
+ fprintf(stderr,
+ "radeonsi: Unknown GPU, using 0 for raster_config\n");
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
+ break;
+ }
- /* Always use the default config when all backends are enabled
- * (or when we failed to determine the enabled backends).
- */
- if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
- raster_config);
- } else {
- si_write_harvested_raster_configs(sctx, pm4, raster_config);
- }
+ /* Always use the default config when all backends are enabled
+ * (or when we failed to determine the enabled backends).
+ */
+ if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
+ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
+ raster_config);
+ if (sctx->b.chip_class >= CIK)
+ si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
+ raster_config_1);
+ } else {
+ si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
}
si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
@@ -3153,8 +3289,6 @@ void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
- si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0);
- si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0);
si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
@@ -3173,10 +3307,21 @@ void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
if (sctx->b.chip_class >= CIK) {
+ si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffc));
+ si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
+ si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xfffe));
+ si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
}
+ if (sctx->b.chip_class >= VI) {
+ si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
+ S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
+ si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
+ si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
+ }
+
sctx->init_config = pm4;
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 5e68b162137..b8f63c5dd36 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -30,6 +30,8 @@
#include "si_pm4.h"
#include "radeon/r600_pipe_common.h"
+#define SI_NUM_SHADERS (PIPE_SHADER_TESS_EVAL+1)
+
struct si_screen;
struct si_shader;
@@ -92,18 +94,21 @@ union si_state {
struct si_pm4_state *blend_color;
struct si_pm4_state *clip;
struct si_state_sample_mask *sample_mask;
- struct si_state_scissor *scissor;
- struct si_state_viewport *viewport;
+ struct si_state_scissor *scissor[16];
+ struct si_state_viewport *viewport[16];
struct si_state_rasterizer *rasterizer;
struct si_state_dsa *dsa;
struct si_pm4_state *fb_rs;
struct si_pm4_state *fb_blend;
struct si_pm4_state *dsa_stencil_ref;
struct si_pm4_state *ta_bordercolor_base;
+ struct si_pm4_state *ls;
+ struct si_pm4_state *hs;
struct si_pm4_state *es;
struct si_pm4_state *gs;
struct si_pm4_state *gs_rings;
- struct si_pm4_state *gs_onoff;
+ struct si_pm4_state *tf_ring;
+ struct si_pm4_state *vgt_shader_config;
struct si_pm4_state *vs;
struct si_pm4_state *ps;
struct si_pm4_state *spi;
@@ -111,6 +116,11 @@ union si_state {
struct si_pm4_state *array[0];
};
+struct si_shader_data {
+ struct r600_atom atom;
+ uint32_t sh_base[SI_NUM_SHADERS];
+};
+
#define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */
#define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS
#define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1)
@@ -135,68 +145,61 @@ union si_state {
* Ring buffers: 0..1
* Streamout buffers: 2..5
*/
-#define SI_RING_ESGS 0
-#define SI_RING_GSVS 1
-#define SI_NUM_RING_BUFFERS 2
+#define SI_RING_TESS_FACTOR 0 /* for HS (TCS) */
+#define SI_RING_ESGS 0 /* for ES, GS */
+#define SI_RING_GSVS 1 /* for GS, VS */
+#define SI_RING_GSVS_1 2 /* 1, 2, 3 for GS */
+#define SI_RING_GSVS_2 3
+#define SI_RING_GSVS_3 4
+#define SI_NUM_RING_BUFFERS 5
#define SI_SO_BUF_OFFSET SI_NUM_RING_BUFFERS
#define SI_NUM_RW_BUFFERS (SI_SO_BUF_OFFSET + 4)
#define SI_NUM_VERTEX_BUFFERS 16
-/* This represents resource descriptors in memory, such as buffer resources,
+/* This represents descriptors in memory, such as buffer resources,
* image resources, and sampler states.
*/
struct si_descriptors {
- struct r600_atom atom;
-
- /* The size of one resource descriptor. */
+ /* The list of descriptors in malloc'd memory. */
+ uint32_t *list;
+ /* The size of one descriptor. */
unsigned element_dw_size;
- /* The maximum number of resource descriptors. */
+ /* The maximum number of descriptors. */
unsigned num_elements;
+ /* Whether the list has been changed and should be re-uploaded. */
+ bool list_dirty;
- /* The buffer where resource descriptors are stored. */
+ /* The buffer where the descriptors have been uploaded. */
struct r600_resource *buffer;
unsigned buffer_offset;
- /* The i-th bit is set if that element is dirty (changed but not emitted). */
- uint64_t dirty_mask;
/* The i-th bit is set if that element is enabled (non-NULL resource). */
uint64_t enabled_mask;
- /* We can't update descriptors directly because the GPU might be
- * reading them at the same time, so we have to update them
- * in a copy-on-write manner. Each such copy is called a context,
- * which is just another array descriptors in the same buffer. */
- unsigned current_context_id;
- /* The size of a context, should be equal to 4*element_dw_size*num_elements. */
- unsigned context_size;
-
- /* The shader userdata register where the 64-bit pointer to the descriptor
+ /* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
* array will be stored. */
- unsigned shader_userdata_reg;
+ unsigned shader_userdata_offset;
+ /* Whether the pointer should be re-emitted. */
+ bool pointer_dirty;
};
struct si_sampler_views {
struct si_descriptors desc;
struct pipe_sampler_view *views[SI_NUM_SAMPLER_VIEWS];
- uint32_t *desc_data[SI_NUM_SAMPLER_VIEWS];
};
struct si_sampler_states {
struct si_descriptors desc;
- uint32_t *desc_data[SI_NUM_SAMPLER_STATES];
void *saved_states[2]; /* saved for u_blitter */
};
struct si_buffer_resources {
struct si_descriptors desc;
- unsigned num_buffers;
enum radeon_bo_usage shader_usage; /* READ, WRITE, or READWRITE */
enum radeon_bo_priority priority;
struct pipe_resource **buffers; /* this has num_buffers elements */
- uint32_t *desc_storage; /* this has num_buffers*4 elements */
- uint32_t **desc_data; /* an array of pointers pointing to desc_storage */
};
#define si_pm4_block_idx(member) \
@@ -232,20 +235,18 @@ struct si_buffer_resources {
/* si_descriptors.c */
void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
unsigned start, unsigned count, void **states);
-void si_update_vertex_buffers(struct si_context *sctx);
void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
struct pipe_resource *buffer,
unsigned stride, unsigned num_records,
bool add_tid, bool swizzle,
- unsigned element_size, unsigned index_stride);
+ unsigned element_size, unsigned index_stride, uint64_t offset);
void si_init_all_descriptors(struct si_context *sctx);
+bool si_upload_shader_descriptors(struct si_context *sctx);
void si_release_all_descriptors(struct si_context *sctx);
void si_all_descriptors_begin_new_cs(struct si_context *sctx);
-void si_copy_buffer(struct si_context *sctx,
- struct pipe_resource *dst, struct pipe_resource *src,
- uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer);
void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
const uint8_t *ptr, unsigned size, uint32_t *const_offset);
+void si_shader_change_notify(struct si_context *sctx);
/* si_state.c */
struct si_shader_selector;
@@ -256,7 +257,6 @@ boolean si_is_format_supported(struct pipe_screen *screen,
unsigned sample_count,
unsigned usage);
void si_init_state_functions(struct si_context *sctx);
-void si_init_config(struct si_context *sctx);
unsigned cik_bank_wh(unsigned bankwh);
unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode);
unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect);
@@ -264,6 +264,12 @@ unsigned cik_tile_split(unsigned tile_split);
unsigned si_array_mode(unsigned mode);
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex);
unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil);
+struct pipe_sampler_view *
+si_create_sampler_view_custom(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *state,
+ unsigned width0, unsigned height0,
+ unsigned force_level);
/* si_state_shader.c */
void si_update_shaders(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 2e77d85a80d..4c21655596c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -31,6 +31,7 @@
#include "util/u_index_modify.h"
#include "util/u_upload_mgr.h"
+#include "util/u_prim.h"
static void si_decompress_textures(struct si_context *sctx)
{
@@ -64,6 +65,7 @@ static unsigned si_conv_pipe_prim(unsigned mode)
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_008958_DI_PT_LINESTRIP_ADJ,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = V_008958_DI_PT_TRILIST_ADJ,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_008958_DI_PT_TRISTRIP_ADJ,
+ [PIPE_PRIM_PATCHES] = V_008958_DI_PT_PATCH,
[R600_PRIM_RECTANGLE_LIST] = V_008958_DI_PT_RECTLIST
};
assert(mode < Elements(prim_conv));
@@ -87,6 +89,7 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode)
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
[R600_PRIM_RECTANGLE_LIST] = V_028A6C_OUTPRIM_TYPE_TRISTRIP
};
assert(mode < Elements(prim_conv));
@@ -94,8 +97,128 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode)
return prim_conv[mode];
}
+/**
+ * This calculates the LDS size for tessellation shaders (VS, TCS, TES).
+ * LS.LDS_SIZE is shared by all 3 shader stages.
+ *
+ * The information about LDS and other non-compile-time parameters is then
+ * written to userdata SGPRs.
+ */
+static void si_emit_derived_tess_state(struct si_context *sctx,
+ const struct pipe_draw_info *info,
+ unsigned *num_patches)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct si_shader_selector *ls = sctx->vs_shader;
+ /* The TES pointer will only be used for sctx->last_tcs.
+ * It would be wrong to think that TCS = TES. */
+ struct si_shader_selector *tcs =
+ sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader;
+ unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
+ unsigned num_tcs_input_cp = info->vertices_per_patch;
+ unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
+ unsigned num_tcs_patch_outputs;
+ unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size;
+ unsigned input_patch_size, output_patch_size, output_patch0_offset;
+ unsigned perpatch_output_offset, lds_size, ls_rsrc2;
+ unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
+
+ *num_patches = 1; /* TODO: calculate this */
+
+ if (sctx->last_ls == ls->current &&
+ sctx->last_tcs == tcs &&
+ sctx->last_tes_sh_base == tes_sh_base &&
+ sctx->last_num_tcs_input_cp == num_tcs_input_cp)
+ return;
+
+ sctx->last_ls = ls->current;
+ sctx->last_tcs = tcs;
+ sctx->last_tes_sh_base = tes_sh_base;
+ sctx->last_num_tcs_input_cp = num_tcs_input_cp;
+
+ /* This calculates how shader inputs and outputs among VS, TCS, and TES
+ * are laid out in LDS. */
+ num_tcs_inputs = util_last_bit64(ls->outputs_written);
+
+ if (sctx->tcs_shader) {
+ num_tcs_outputs = util_last_bit64(tcs->outputs_written);
+ num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+ num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
+ } else {
+ /* No TCS. Route varyings from LS to TES. */
+ num_tcs_outputs = num_tcs_inputs;
+ num_tcs_output_cp = num_tcs_input_cp;
+ num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
+ }
+
+ input_vertex_size = num_tcs_inputs * 16;
+ output_vertex_size = num_tcs_outputs * 16;
+
+ input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+ pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+ output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+ output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0;
+ perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
+
+ lds_size = output_patch0_offset + output_patch_size * *num_patches;
+ ls_rsrc2 = ls->current->ls_rsrc2;
+
+ if (sctx->b.chip_class >= CIK) {
+ assert(lds_size <= 65536);
+ ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
+ } else {
+ assert(lds_size <= 32768);
+ ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
+ }
+
+ /* Due to a hw bug, RSRC2_LS must be written twice with another
+ * LS register written in between. */
+ if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
+ si_write_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
+ si_write_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+ radeon_emit(cs, ls->current->ls_rsrc1);
+ radeon_emit(cs, ls_rsrc2);
+
+ /* Compute userdata SGPRs. */
+ assert(((input_vertex_size / 4) & ~0xff) == 0);
+ assert(((output_vertex_size / 4) & ~0xff) == 0);
+ assert(((input_patch_size / 4) & ~0x1fff) == 0);
+ assert(((output_patch_size / 4) & ~0x1fff) == 0);
+ assert(((output_patch0_offset / 16) & ~0xffff) == 0);
+ assert(((perpatch_output_offset / 16) & ~0xffff) == 0);
+ assert(num_tcs_input_cp <= 32);
+ assert(num_tcs_output_cp <= 32);
+
+ tcs_in_layout = (input_patch_size / 4) |
+ ((input_vertex_size / 4) << 13);
+ tcs_out_layout = (output_patch_size / 4) |
+ ((output_vertex_size / 4) << 13);
+ tcs_out_offsets = (output_patch0_offset / 16) |
+ ((perpatch_output_offset / 16) << 16);
+
+ /* Set them for LS. */
+ si_write_sh_reg(cs,
+ R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4,
+ tcs_in_layout);
+
+ /* Set them for TCS. */
+ si_write_sh_reg_seq(cs,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OUT_OFFSETS * 4, 3);
+ radeon_emit(cs, tcs_out_offsets);
+ radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
+ radeon_emit(cs, tcs_in_layout);
+
+ /* Set them for TES. */
+ si_write_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
+ radeon_emit(cs, tcs_out_offsets);
+ radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
+}
+
static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
- const struct pipe_draw_info *info)
+ const struct pipe_draw_info *info,
+ unsigned num_patches)
{
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned prim = info->mode;
@@ -104,11 +227,41 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
/* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false;
bool ia_switch_on_eop = false;
+ bool ia_switch_on_eoi = false;
bool partial_vs_wave = false;
+ bool partial_es_wave = false;
if (sctx->gs_shader)
primgroup_size = 64; /* recommended with a GS */
+ if (sctx->tes_shader) {
+ unsigned num_cp_out =
+ sctx->tcs_shader ?
+ sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+ info->vertices_per_patch;
+ unsigned max_size = 256 / MAX2(info->vertices_per_patch, num_cp_out);
+
+ primgroup_size = MIN2(primgroup_size, max_size);
+
+ /* primgroup_size must be set to a multiple of NUM_PATCHES */
+ primgroup_size = (primgroup_size / num_patches) * num_patches;
+
+ /* SWITCH_ON_EOI must be set if PrimID is used.
+ * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+ if ((sctx->tcs_shader && sctx->tcs_shader->info.uses_primid) ||
+ sctx->tes_shader->info.uses_primid) {
+ ia_switch_on_eoi = true;
+ partial_es_wave = true;
+ }
+
+ /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
+ if ((sctx->b.family == CHIP_TAHITI ||
+ sctx->b.family == CHIP_PITCAIRN ||
+ sctx->b.family == CHIP_BONAIRE) &&
+ sctx->gs_shader)
+ partial_vs_wave = true;
+ }
+
/* This is a hardware requirement. */
if ((rs && rs->line_stipple_enable) ||
(sctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) {
@@ -139,14 +292,52 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
(info->indirect || info->instance_count > 1))
wd_switch_on_eop = true;
+ /* USE_OPAQUE doesn't work when WD_SWITCH_ON_EOP is 0. */
+ if (info->count_from_stream_output)
+ wd_switch_on_eop = true;
+
/* If the WD switch is false, the IA switch must be false too. */
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
+ /* Hw bug with single-primitive instances and SWITCH_ON_EOI
+ * on multi-SE chips. */
+ if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
+ (info->indirect ||
+ (info->instance_count > 1 &&
+ u_prims_for_vertices(info->mode, info->count) <= 1)))
+ sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
+
+ /* Instancing bug on 2 SE chips. */
+ if (sctx->b.screen->info.max_se == 2 && ia_switch_on_eoi &&
+ (info->indirect || info->instance_count > 1))
+ partial_vs_wave = true;
+
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
+ S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
+ S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
- S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0);
+ S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0);
+}
+
+static unsigned si_get_ls_hs_config(struct si_context *sctx,
+ const struct pipe_draw_info *info,
+ unsigned num_patches)
+{
+ unsigned num_output_cp;
+
+ if (!sctx->tes_shader)
+ return 0;
+
+ num_output_cp = sctx->tcs_shader ?
+ sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+ info->vertices_per_patch;
+
+ return S_028B58_NUM_PATCHES(num_patches) |
+ S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) |
+ S_028B58_HS_NUM_OUTPUT_CP(num_output_cp);
}
static void si_emit_scratch_reloc(struct si_context *sctx)
@@ -202,22 +393,31 @@ static void si_emit_draw_registers(struct si_context *sctx,
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
unsigned prim = si_conv_pipe_prim(info->mode);
unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
- unsigned ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info);
+ unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
+
+ if (sctx->tes_shader)
+ si_emit_derived_tess_state(sctx, info, &num_patches);
+
+ ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
+ ls_hs_config = si_get_ls_hs_config(sctx, info, num_patches);
/* Draw state. */
if (prim != sctx->last_prim ||
- ia_multi_vgt_param != sctx->last_multi_vgt_param) {
+ ia_multi_vgt_param != sctx->last_multi_vgt_param ||
+ ls_hs_config != sctx->last_ls_hs_config) {
if (sctx->b.chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DRAW_PREAMBLE, 2, 0));
radeon_emit(cs, prim); /* VGT_PRIMITIVE_TYPE */
radeon_emit(cs, ia_multi_vgt_param); /* IA_MULTI_VGT_PARAM */
- radeon_emit(cs, 0); /* VGT_LS_HS_CONFIG */
+ radeon_emit(cs, ls_hs_config); /* VGT_LS_HS_CONFIG */
} else {
r600_write_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
r600_write_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+ r600_write_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
}
sctx->last_prim = prim;
sctx->last_multi_vgt_param = ia_multi_vgt_param;
+ sctx->last_ls_hs_config = ls_hs_config;
}
if (gs_out_prim != sctx->last_gs_out_prim) {
@@ -245,8 +445,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
const struct pipe_index_buffer *ib)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- unsigned sh_base_reg = (sctx->gs_shader ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
- R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
if (info->count_from_stream_output) {
struct r600_so_target *t =
@@ -275,12 +474,24 @@ static void si_emit_draw_packets(struct si_context *sctx,
if (info->indexed) {
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
- if (ib->index_size == 4) {
- radeon_emit(cs, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ?
- V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
- } else {
- radeon_emit(cs, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ?
- V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+ /* index type */
+ switch (ib->index_size) {
+ case 1:
+ radeon_emit(cs, V_028A7C_VGT_INDEX_8);
+ break;
+ case 2:
+ radeon_emit(cs, V_028A7C_VGT_INDEX_16 |
+ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+ V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+ break;
+ case 4:
+ radeon_emit(cs, V_028A7C_VGT_INDEX_32 |
+ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+ V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
+ break;
+ default:
+ assert(!"unreachable");
+ return;
}
}
@@ -406,9 +617,14 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
if (sctx->flags & SI_CONTEXT_INV_TC_L1)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
- if (sctx->flags & SI_CONTEXT_INV_TC_L2)
+ if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
+ /* TODO: this might not be needed. */
+ if (sctx->chip_class >= VI)
+ cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
+ }
+
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
@@ -520,8 +736,14 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
(info->indexed || !info->count_from_stream_output))
return;
- if (!sctx->ps_shader || !sctx->vs_shader)
+ if (!sctx->ps_shader || !sctx->vs_shader) {
+ assert(0);
return;
+ }
+ if (!!sctx->tes_shader != (info->mode == PIPE_PRIM_PATCHES)) {
+ assert(0);
+ return;
+ }
si_decompress_textures(sctx);
@@ -532,15 +754,15 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
* current_rast_prim for this draw_vbo call. */
if (sctx->gs_shader)
sctx->current_rast_prim = sctx->gs_shader->gs_output_prim;
+ else if (sctx->tes_shader)
+ sctx->current_rast_prim =
+ sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
else
sctx->current_rast_prim = info->mode;
si_update_shaders(sctx);
-
- if (sctx->vertex_buffers_dirty) {
- si_update_vertex_buffers(sctx);
- sctx->vertex_buffers_dirty = false;
- }
+ if (!si_upload_shader_descriptors(sctx))
+ return;
if (info->indexed) {
/* Initialize the index buffer struct. */
@@ -550,7 +772,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
ib.offset = sctx->index_buffer.offset;
/* Translate or upload, if needed. */
- if (ib.index_size == 1) {
+ /* 8-bit indices are supported on VI. */
+ if (sctx->b.chip_class <= CIK && ib.index_size == 1) {
struct pipe_resource *out_buffer = NULL;
unsigned out_offset, start, count, start_offset;
void *ptr;
@@ -585,6 +808,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
}
+ /* TODO: VI should read index buffers through TC, so this shouldn't be
+ * needed on VI. */
if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
r600_resource(ib.buffer)->TC_L2_dirty = false;
@@ -592,7 +817,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
/* Check flush flags. */
if (sctx->b.flags)
- sctx->atoms.s.cache_flush->dirty = true;
+ si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
si_need_cs_space(sctx, 0, TRUE);
@@ -618,7 +843,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
/* Workaround for a VGT hang when streamout is enabled.
* It must be done after drawing. */
- if (sctx->b.family == CHIP_HAWAII &&
+ if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
(sctx->b.streamout.streamout_enabled ||
sctx->b.streamout.prims_gen_query_enabled)) {
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 208c8523ef1..0347014948d 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -30,9 +30,135 @@
#include "sid.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
#include "util/u_memory.h"
#include "util/u_simple_shaders.h"
+static void si_set_tesseval_regs(struct si_shader *shader,
+ struct si_pm4_state *pm4)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+ unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
+ bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
+ bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
+ unsigned type, partitioning, topology;
+
+ switch (tes_prim_mode) {
+ case PIPE_PRIM_LINES:
+ type = V_028B6C_TESS_ISOLINE;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ type = V_028B6C_TESS_TRIANGLE;
+ break;
+ case PIPE_PRIM_QUADS:
+ type = V_028B6C_TESS_QUAD;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ switch (tes_spacing) {
+ case PIPE_TESS_SPACING_FRACTIONAL_ODD:
+ partitioning = V_028B6C_PART_FRAC_ODD;
+ break;
+ case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
+ partitioning = V_028B6C_PART_FRAC_EVEN;
+ break;
+ case PIPE_TESS_SPACING_EQUAL:
+ partitioning = V_028B6C_PART_INTEGER;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (tes_point_mode)
+ topology = V_028B6C_OUTPUT_POINT;
+ else if (tes_prim_mode == PIPE_PRIM_LINES)
+ topology = V_028B6C_OUTPUT_LINE;
+ else if (tes_vertex_order_cw)
+ /* for some reason, this must be the other way around */
+ topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
+ else
+ topology = V_028B6C_OUTPUT_TRIANGLE_CW;
+
+ si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
+ S_028B6C_TYPE(type) |
+ S_028B6C_PARTITIONING(partitioning) |
+ S_028B6C_TOPOLOGY(topology));
+}
+
+static void si_shader_ls(struct si_shader *shader)
+{
+ struct si_pm4_state *pm4;
+ unsigned num_sgprs, num_user_sgprs;
+ unsigned vgpr_comp_cnt;
+ uint64_t va;
+
+ pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+ if (pm4 == NULL)
+ return;
+
+ va = shader->bo->gpu_address;
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+
+ /* We need at least 2 components for LS.
+ * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
+ vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
+
+ num_user_sgprs = SI_LS_NUM_USER_SGPR;
+ num_sgprs = shader->num_sgprs;
+ if (num_user_sgprs > num_sgprs) {
+ /* Last 2 reserved SGPRs are used for VCC */
+ num_sgprs = num_user_sgprs + 2;
+ }
+ assert(num_sgprs <= 104);
+
+ si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
+ si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
+
+ shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B528_SGPRS((num_sgprs - 1) / 8) |
+ S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
+ shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+ S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
+}
+
+static void si_shader_hs(struct si_shader *shader)
+{
+ struct si_pm4_state *pm4;
+ unsigned num_sgprs, num_user_sgprs;
+ uint64_t va;
+
+ pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+ if (pm4 == NULL)
+ return;
+
+ va = shader->bo->gpu_address;
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+
+ num_user_sgprs = SI_TCS_NUM_USER_SGPR;
+ num_sgprs = shader->num_sgprs;
+ /* One SGPR after user SGPRs is pre-loaded with tessellation factor
+ * buffer offset. */
+ if ((num_user_sgprs + 1) > num_sgprs) {
+ /* Last 2 reserved SGPRs are used for VCC */
+ num_sgprs = num_user_sgprs + 1 + 2;
+ }
+ assert(num_sgprs <= 104);
+
+ si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
+ si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+ si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
+ S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B428_SGPRS((num_sgprs - 1) / 8));
+ si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
+ S_00B42C_USER_SGPR(num_user_sgprs) |
+ S_00B42C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+}
+
static void si_shader_es(struct si_shader *shader)
{
struct si_pm4_state *pm4;
@@ -48,9 +174,15 @@ static void si_shader_es(struct si_shader *shader)
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ if (shader->selector->type == PIPE_SHADER_VERTEX) {
+ vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
+ vgpr_comp_cnt = 3; /* all components are needed for TES */
+ num_user_sgprs = SI_TES_NUM_USER_SGPR;
+ } else
+ assert(0);
- num_user_sgprs = SI_VS_NUM_USER_SGPR;
num_sgprs = shader->num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
if ((num_user_sgprs + 1) > num_sgprs) {
@@ -69,17 +201,37 @@ static void si_shader_es(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
S_00B32C_USER_SGPR(num_user_sgprs) |
S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+
+ if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+ si_set_tesseval_regs(shader, pm4);
+}
+
+static unsigned si_gs_get_max_stream(struct si_shader *shader)
+{
+ struct pipe_stream_output_info *so = &shader->selector->so;
+ unsigned max_stream = 0, i;
+
+ if (so->num_outputs == 0)
+ return 0;
+
+ for (i = 0; i < so->num_outputs; i++) {
+ if (so->output[i].stream > max_stream)
+ max_stream = so->output[i].stream;
+ }
+ return max_stream;
}
static void si_shader_gs(struct si_shader *shader)
{
- unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 2);
+ unsigned gs_vert_itemsize = shader->selector->info.num_outputs * 16;
unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
- unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+ unsigned gsvs_itemsize = (gs_vert_itemsize * gs_max_vert_out) >> 2;
+ unsigned gs_num_invocations = shader->selector->gs_num_invocations;
unsigned cut_mode;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
uint64_t va;
+ unsigned max_stream = si_gs_get_max_stream(shader);
/* The GSVS_RING_ITEMSIZE register takes 15 bits */
assert(gsvs_itemsize < (1 << 15));
@@ -107,16 +259,23 @@ static void si_shader_gs(struct si_shader *shader)
S_028A40_GS_WRITE_OPTIMIZE(1));
si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
- si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize);
- si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
+ si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1));
+ si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * ((max_stream >= 3) ? 3 : 1));
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- util_bitcount64(shader->selector->gs_used_inputs) * (16 >> 2));
- si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
+ util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
+ si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
- si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
+ si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize >> 2);
+ si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, (max_stream >= 1) ? gs_vert_itemsize >> 2 : 0);
+ si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, (max_stream >= 2) ? gs_vert_itemsize >> 2 : 0);
+ si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, (max_stream >= 3) ? gs_vert_itemsize >> 2 : 0);
+
+ si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
+ S_028B90_ENABLE(gs_num_invocations > 0));
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
@@ -143,19 +302,29 @@ static void si_shader_gs(struct si_shader *shader)
static void si_shader_vs(struct si_shader *shader)
{
- struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
- unsigned nparams, i, vgpr_comp_cnt;
+ unsigned nparams, vgpr_comp_cnt;
uint64_t va;
unsigned window_space =
shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+ bool enable_prim_id = si_vs_exports_prim_id(shader);
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
if (pm4 == NULL)
return;
+ /* If this is the GS copy shader, the GS state writes this register.
+ * Otherwise, the VS state writes it.
+ */
+ if (!shader->is_gs_copy_shader) {
+ si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
+ S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0));
+ si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id);
+ } else
+ si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
+
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
@@ -163,8 +332,11 @@ static void si_shader_vs(struct si_shader *shader)
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ vgpr_comp_cnt = shader->uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
+ vgpr_comp_cnt = 3; /* all components are needed for TES */
+ num_user_sgprs = SI_TES_NUM_USER_SGPR;
} else
assert(0);
@@ -175,28 +347,8 @@ static void si_shader_vs(struct si_shader *shader)
}
assert(num_sgprs <= 104);
- /* Certain attributes (position, psize, etc.) don't count as params.
- * VS is required to export at least one param and r600_shader_from_tgsi()
- * takes care of adding a dummy export.
- */
- for (nparams = 0, i = 0 ; i < info->num_outputs; i++) {
- switch (info->output_semantic_name[i]) {
- case TGSI_SEMANTIC_CLIPVERTEX:
- case TGSI_SEMANTIC_CLIPDIST:
- case TGSI_SEMANTIC_CULLDIST:
- case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_PSIZE:
- case TGSI_SEMANTIC_EDGEFLAG:
- case TGSI_SEMANTIC_VIEWPORT_INDEX:
- case TGSI_SEMANTIC_LAYER:
- break;
- default:
- nparams++;
- }
- }
- if (nparams < 1)
- nparams = 1;
-
+ /* VS is required to export at least one param. */
+ nparams = MAX2(shader->nr_param_exports, 1);
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
S_0286C4_VS_EXPORT_COUNT(nparams - 1));
@@ -236,6 +388,9 @@ static void si_shader_vs(struct si_shader *shader)
S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
+
+ if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+ si_set_tesseval_regs(shader, pm4);
}
static void si_shader_ps(struct si_shader *shader)
@@ -333,7 +488,18 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
- if (shader->key.vs.as_es)
+ if (shader->key.vs.as_ls)
+ si_shader_ls(shader);
+ else if (shader->key.vs.as_es)
+ si_shader_es(shader);
+ else
+ si_shader_vs(shader);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ si_shader_hs(shader);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (shader->key.tes.as_es)
si_shader_es(shader);
else
si_shader_vs(shader);
@@ -351,7 +517,7 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
}
/* Compute the key for the hw shader variant */
-static INLINE void si_shader_selector_key(struct pipe_context *ctx,
+static inline void si_shader_selector_key(struct pipe_context *ctx,
struct si_shader_selector *sel,
union si_shader_key *key)
{
@@ -367,10 +533,27 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx,
key->vs.instance_divisors[i] =
sctx->vertex_elements->elements[i].instance_divisor;
- if (sctx->gs_shader) {
+ if (sctx->tes_shader)
+ key->vs.as_ls = 1;
+ else if (sctx->gs_shader) {
key->vs.as_es = 1;
- key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs;
+ key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
}
+
+ if (!sctx->gs_shader && sctx->ps_shader &&
+ sctx->ps_shader->info.uses_primid)
+ key->vs.export_prim_id = 1;
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ key->tcs.prim_mode =
+ sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (sctx->gs_shader) {
+ key->tes.as_es = 1;
+ key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
+ } else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid)
+ key->tes.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
break;
@@ -468,6 +651,7 @@ static int si_shader_select(struct pipe_context *ctx,
}
si_shader_init_pm4_state(shader);
sel->num_shaders++;
+ p_atomic_inc(&sctx->screen->b.num_compilations);
}
return 0;
@@ -485,6 +669,7 @@ static void *si_create_shader_state(struct pipe_context *ctx,
sel->tokens = tgsi_dup_tokens(state->tokens);
sel->so = state->stream_output;
tgsi_scan_shader(state->tokens, &sel->info);
+ p_atomic_inc(&sscreen->b.num_shaders_created);
switch (pipe_shader_type) {
case PIPE_SHADER_GEOMETRY:
@@ -492,6 +677,8 @@ static void *si_create_shader_state(struct pipe_context *ctx,
sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
sel->gs_max_out_vertices =
sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+ sel->gs_num_invocations =
+ sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
for (i = 0; i < sel->info.num_inputs; i++) {
unsigned name = sel->info.input_semantic_name[i];
@@ -501,10 +688,31 @@ static void *si_create_shader_state(struct pipe_context *ctx,
case TGSI_SEMANTIC_PRIMID:
break;
default:
- sel->gs_used_inputs |=
+ sel->inputs_read |=
1llu << si_shader_io_get_unique_index(name, index);
}
}
+ break;
+
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_TESS_CTRL:
+ for (i = 0; i < sel->info.num_outputs; i++) {
+ unsigned name = sel->info.output_semantic_name[i];
+ unsigned index = sel->info.output_semantic_index[i];
+
+ switch (name) {
+ case TGSI_SEMANTIC_TESSINNER:
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_PATCH:
+ sel->patch_outputs_written |=
+ 1llu << si_shader_io_get_unique_index(name, index);
+ break;
+ default:
+ sel->outputs_written |=
+ 1llu << si_shader_io_get_unique_index(name, index);
+ }
+ }
+ break;
}
if (sscreen->b.debug_flags & DBG_PRECOMPILE)
@@ -531,6 +739,18 @@ static void *si_create_vs_state(struct pipe_context *ctx,
return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
}
+static void *si_create_tcs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_CTRL);
+}
+
+static void *si_create_tes_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
+}
+
static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -540,20 +760,58 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
return;
sctx->vs_shader = sel;
- sctx->clip_regs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
}
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
+ bool enable_changed = !!sctx->gs_shader != !!sel;
if (sctx->gs_shader == sel)
return;
sctx->gs_shader = sel;
- sctx->clip_regs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+
+ if (enable_changed)
+ si_shader_change_notify(sctx);
+}
+
+static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = state;
+ bool enable_changed = !!sctx->tcs_shader != !!sel;
+
+ if (sctx->tcs_shader == sel)
+ return;
+
+ sctx->tcs_shader = sel;
+
+ if (enable_changed)
+ sctx->last_tcs = NULL; /* invalidate derived tess state */
+}
+
+static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = state;
+ bool enable_changed = !!sctx->tes_shader != !!sel;
+
+ if (sctx->tes_shader == sel)
+ return;
+
+ sctx->tes_shader = sel;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
+ sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+
+ if (enable_changed) {
+ si_shader_change_notify(sctx);
+ sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
+ }
}
static void si_make_dummy_ps(struct si_context *sctx)
@@ -594,7 +852,18 @@ static void si_delete_shader_selector(struct pipe_context *ctx,
c = p->next_variant;
switch (sel->type) {
case PIPE_SHADER_VERTEX:
- if (p->key.vs.as_es)
+ if (p->key.vs.as_ls)
+ si_pm4_delete_state(sctx, ls, p->pm4);
+ else if (p->key.vs.as_es)
+ si_pm4_delete_state(sctx, es, p->pm4);
+ else
+ si_pm4_delete_state(sctx, vs, p->pm4);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ si_pm4_delete_state(sctx, hs, p->pm4);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (p->key.tes.as_es)
si_pm4_delete_state(sctx, es, p->pm4);
else
si_pm4_delete_state(sctx, vs, p->pm4);
@@ -653,6 +922,30 @@ static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
si_delete_shader_selector(ctx, sel);
}
+static void si_delete_tcs_shader(struct pipe_context *ctx, void *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = (struct si_shader_selector *)state;
+
+ if (sctx->tcs_shader == sel) {
+ sctx->tcs_shader = NULL;
+ }
+
+ si_delete_shader_selector(ctx, sel);
+}
+
+static void si_delete_tes_shader(struct pipe_context *ctx, void *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = (struct si_shader_selector *)state;
+
+ if (sctx->tes_shader == sel) {
+ sctx->tes_shader = NULL;
+ }
+
+ si_delete_shader_selector(ctx, sel);
+}
+
static void si_update_spi_map(struct si_context *sctx)
{
struct si_shader *ps = sctx->ps_shader->current;
@@ -694,7 +987,10 @@ bcolor:
}
}
- if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
+ if (name == TGSI_SEMANTIC_PRIMID)
+ /* PrimID is written after the last output. */
+ tmp |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
+ else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
/* No corresponding output found, load defaults into input.
* Don't set any other bits.
* (FLAT_SHADE=1 completely changes behavior) */
@@ -720,7 +1016,7 @@ bcolor:
static void si_init_gs_rings(struct si_context *sctx)
{
unsigned esgs_ring_size = 128 * 1024;
- unsigned gsvs_ring_size = 64 * 1024 * 1024;
+ unsigned gsvs_ring_size = 60 * 1024 * 1024;
assert(!sctx->gs_rings);
sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
@@ -732,6 +1028,12 @@ static void si_init_gs_rings(struct si_context *sctx)
PIPE_USAGE_DEFAULT, gsvs_ring_size);
if (sctx->b.chip_class >= CIK) {
+ if (sctx->b.chip_class >= VI) {
+ /* The maximum sizes are 63.999 MB on VI, because
+ * the register fields only have 18 bits. */
+ assert(esgs_ring_size / 256 < (1 << 18));
+ assert(gsvs_ring_size / 256 < (1 << 18));
+ }
si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
esgs_ring_size / 256);
si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
@@ -745,15 +1047,42 @@ static void si_init_gs_rings(struct si_context *sctx)
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
sctx->esgs_ring, 0, esgs_ring_size,
- true, true, 4, 64);
+ true, true, 4, 64, 0);
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
sctx->esgs_ring, 0, esgs_ring_size,
- false, false, 0, 0);
+ false, false, 0, 0, 0);
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
sctx->gsvs_ring, 0, gsvs_ring_size,
- false, false, 0, 0);
+ false, false, 0, 0, 0);
}
+static void si_update_gs_rings(struct si_context *sctx)
+{
+ unsigned gs_vert_itemsize = sctx->gs_shader->info.num_outputs * 16;
+ unsigned gs_max_vert_out = sctx->gs_shader->gs_max_out_vertices;
+ unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+ uint64_t offset;
+
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, 0);
+
+ offset = gsvs_itemsize * 64;
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_1,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, offset);
+
+ offset = (gsvs_itemsize * 2) * 64;
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_2,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, offset);
+
+ offset = (gsvs_itemsize * 3) * 64;
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_3,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, offset);
+
+}
/**
* @returns 1 if \p sel has been updated to use a new scratch buffer and 0
* otherwise.
@@ -763,7 +1092,6 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
{
struct si_shader *shader;
uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
- unsigned char *ptr;
if (!sel)
return 0;
@@ -784,12 +1112,7 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
/* Replace the shader bo with a new bo that has the relocs applied. */
- r600_resource_reference(&shader->bo, NULL);
- shader->bo = si_resource_create_custom(&sctx->screen->b.b, PIPE_USAGE_IMMUTABLE,
- shader->binary.code_size);
- ptr = sctx->screen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE);
- util_memcpy_cpu_to_le32(ptr, shader->binary.code, shader->binary.code_size);
- sctx->screen->b.ws->buffer_unmap(shader->bo->cs_buf);
+ si_shader_binary_upload(sctx->screen, shader);
/* Update the shader state to use the new shader bo. */
si_shader_init_pm4_state(shader);
@@ -818,10 +1141,14 @@ static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_context *sctx,
static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
{
-
- return MAX3(si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader),
- si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader),
- si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+ unsigned bytes = 0;
+
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tcs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tes_shader));
+ return bytes;
}
static void si_update_spi_tmpring_size(struct si_context *sctx)
@@ -855,15 +1182,29 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
if (si_update_scratch_buffer(sctx, sctx->gs_shader))
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+ if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
+ si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
- /* VS can be bound as ES or VS. */
- if (sctx->gs_shader) {
+ /* VS can be bound as LS, ES, or VS. */
+ if (sctx->tes_shader) {
+ if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+ si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
+ } else if (sctx->gs_shader) {
if (si_update_scratch_buffer(sctx, sctx->vs_shader))
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
} else {
if (si_update_scratch_buffer(sctx, sctx->vs_shader))
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
}
+
+ /* TES can be bound as ES or VS. */
+ if (sctx->gs_shader) {
+ if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+ si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+ } else {
+ if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+ si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+ }
}
/* The LLVM shader backend should be reporting aligned scratch_sizes. */
@@ -874,60 +1215,187 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
}
+static void si_init_tess_factor_ring(struct si_context *sctx)
+{
+ assert(!sctx->tf_state);
+ sctx->tf_state = CALLOC_STRUCT(si_pm4_state);
+
+ sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT,
+ 32768 * sctx->screen->b.info.max_se);
+ sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
+ sctx->tf_ring->width0, fui(0), false);
+ assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
+
+ if (sctx->b.chip_class >= CIK) {
+ si_pm4_set_reg(sctx->tf_state, R_030938_VGT_TF_RING_SIZE,
+ S_030938_SIZE(sctx->tf_ring->width0 / 4));
+ si_pm4_set_reg(sctx->tf_state, R_030940_VGT_TF_MEMORY_BASE,
+ r600_resource(sctx->tf_ring)->gpu_address >> 8);
+ } else {
+ si_pm4_set_reg(sctx->tf_state, R_008988_VGT_TF_RING_SIZE,
+ S_008988_SIZE(sctx->tf_ring->width0 / 4));
+ si_pm4_set_reg(sctx->tf_state, R_0089B8_VGT_TF_MEMORY_BASE,
+ r600_resource(sctx->tf_ring)->gpu_address >> 8);
+ }
+ si_pm4_add_bo(sctx->tf_state, r600_resource(sctx->tf_ring),
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+ si_pm4_bind_state(sctx, tf_ring, sctx->tf_state);
+
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
+ SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
+ sctx->tf_ring->width0, false, false, 0, 0, 0);
+
+ sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
+}
+
+/**
+ * This is used when TCS is NULL in the VS->TCS->TES chain. In this case,
+ * VS passes its outputs to TES directly, so the fixed-function shader only
+ * has to write TESSOUTER and TESSINNER.
+ */
+static void si_generate_fixed_func_tcs(struct si_context *sctx)
+{
+ struct ureg_src const0, const1;
+ struct ureg_dst tessouter, tessinner;
+ struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_TESS_CTRL);
+
+ if (!ureg)
+ return; /* if we get here, we're screwed */
+
+ assert(!sctx->fixed_func_tcs_shader);
+
+ ureg_DECL_constant2D(ureg, 0, 1, SI_DRIVER_STATE_CONST_BUF);
+ const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
+ SI_DRIVER_STATE_CONST_BUF);
+ const1 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1),
+ SI_DRIVER_STATE_CONST_BUF);
+
+ tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
+ tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);
+
+ ureg_MOV(ureg, tessouter, const0);
+ ureg_MOV(ureg, tessinner, const1);
+ ureg_END(ureg);
+
+ sctx->fixed_func_tcs_shader =
+ ureg_create_shader_and_destroy(ureg, &sctx->b.b);
+ assert(sctx->fixed_func_tcs_shader);
+}
+
+static void si_update_vgt_shader_config(struct si_context *sctx)
+{
+ /* Calculate the index of the config.
+ * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
+ unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader;
+ struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
+
+ if (!*pm4) {
+ uint32_t stages = 0;
+
+ *pm4 = CALLOC_STRUCT(si_pm4_state);
+
+ if (sctx->tes_shader) {
+ stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
+ S_028B54_HS_EN(1);
+
+ if (sctx->gs_shader)
+ stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
+ S_028B54_GS_EN(1) |
+ S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+ else
+ stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
+ } else if (sctx->gs_shader) {
+ stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
+ S_028B54_GS_EN(1) |
+ S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+ }
+
+ si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
+ }
+ si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
+}
+
+static void si_update_so(struct si_context *sctx, struct si_shader_selector *shader)
+{
+ struct pipe_stream_output_info *so = &shader->so;
+ uint32_t enabled_stream_buffers_mask = 0;
+ int i;
+
+ for (i = 0; i < so->num_outputs; i++)
+ enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << (so->output[i].stream * 4);
+ sctx->b.streamout.enabled_stream_buffers_mask = enabled_stream_buffers_mask;
+ sctx->b.streamout.stride_in_dw = shader->so.stride;
+}
+
void si_update_shaders(struct si_context *sctx)
{
struct pipe_context *ctx = (struct pipe_context*)sctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- if (sctx->gs_shader) {
- si_shader_select(ctx, sctx->gs_shader);
- si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
- si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
+ /* Update stages before GS. */
+ if (sctx->tes_shader) {
+ if (!sctx->tf_state)
+ si_init_tess_factor_ring(sctx);
- sctx->b.streamout.stride_in_dw = sctx->gs_shader->so.stride;
+ /* VS as LS */
+ si_shader_select(ctx, sctx->vs_shader);
+ si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
+
+ if (sctx->tcs_shader) {
+ si_shader_select(ctx, sctx->tcs_shader);
+ si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
+ } else {
+ if (!sctx->fixed_func_tcs_shader)
+ si_generate_fixed_func_tcs(sctx);
+ si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+ si_pm4_bind_state(sctx, hs,
+ sctx->fixed_func_tcs_shader->current->pm4);
+ }
+ si_shader_select(ctx, sctx->tes_shader);
+ if (sctx->gs_shader) {
+ /* TES as ES */
+ si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+ } else {
+ /* TES as VS */
+ si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+ si_update_so(sctx, sctx->tes_shader);
+ }
+ } else if (sctx->gs_shader) {
+ /* VS as ES */
si_shader_select(ctx, sctx->vs_shader);
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+ } else {
+ /* VS as VS */
+ si_shader_select(ctx, sctx->vs_shader);
+ si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+ si_update_so(sctx, sctx->vs_shader);
+ }
+
+ /* Update GS. */
+ if (sctx->gs_shader) {
+ si_shader_select(ctx, sctx->gs_shader);
+ si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+ si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
+ si_update_so(sctx, sctx->gs_shader);
if (!sctx->gs_rings)
si_init_gs_rings(sctx);
+
if (sctx->emitted.named.gs_rings != sctx->gs_rings)
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
- si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
- sctx->gsvs_ring,
- sctx->gs_shader->gs_max_out_vertices *
- sctx->gs_shader->info.num_outputs * 16,
- 64, true, true, 4, 16);
-
- if (!sctx->gs_on) {
- sctx->gs_on = CALLOC_STRUCT(si_pm4_state);
-
- si_pm4_set_reg(sctx->gs_on, R_028B54_VGT_SHADER_STAGES_EN,
- S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
- S_028B54_GS_EN(1) |
- S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER));
- }
- si_pm4_bind_state(sctx, gs_onoff, sctx->gs_on);
+ si_update_gs_rings(sctx);
} else {
- si_shader_select(ctx, sctx->vs_shader);
- si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
-
- sctx->b.streamout.stride_in_dw = sctx->vs_shader->so.stride;
-
- if (!sctx->gs_off) {
- sctx->gs_off = CALLOC_STRUCT(si_pm4_state);
-
- si_pm4_set_reg(sctx->gs_off, R_028A40_VGT_GS_MODE, 0);
- si_pm4_set_reg(sctx->gs_off, R_028B54_VGT_SHADER_STAGES_EN, 0);
- }
- si_pm4_bind_state(sctx, gs_onoff, sctx->gs_off);
si_pm4_bind_state(sctx, gs_rings, NULL);
si_pm4_bind_state(sctx, gs, NULL);
si_pm4_bind_state(sctx, es, NULL);
}
+ si_update_vgt_shader_config(sctx);
+
si_shader_select(ctx, sctx->ps_shader);
if (!sctx->ps_shader->current) {
@@ -957,29 +1425,35 @@ void si_update_shaders(struct si_context *sctx)
if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
- sctx->msaa_config.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
if (sctx->b.chip_class == SI)
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
}
void si_init_shader_functions(struct si_context *sctx)
{
sctx->b.b.create_vs_state = si_create_vs_state;
+ sctx->b.b.create_tcs_state = si_create_tcs_state;
+ sctx->b.b.create_tes_state = si_create_tes_state;
sctx->b.b.create_gs_state = si_create_gs_state;
sctx->b.b.create_fs_state = si_create_fs_state;
sctx->b.b.bind_vs_state = si_bind_vs_shader;
+ sctx->b.b.bind_tcs_state = si_bind_tcs_shader;
+ sctx->b.b.bind_tes_state = si_bind_tes_shader;
sctx->b.b.bind_gs_state = si_bind_gs_shader;
sctx->b.b.bind_fs_state = si_bind_ps_shader;
sctx->b.b.delete_vs_state = si_delete_vs_shader;
+ sctx->b.b.delete_tcs_state = si_delete_tcs_shader;
+ sctx->b.b.delete_tes_state = si_delete_tes_shader;
sctx->b.b.delete_gs_state = si_delete_gs_shader;
sctx->b.b.delete_fs_state = si_delete_ps_shader;
}
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 35d5ee232a0..66fdf35c8af 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -206,6 +206,398 @@
* 6. COMMAND [29:22] | BYTE_COUNT [20:0]
*/
+
+#define R_000E4C_SRBM_STATUS2 0x000E4C
+#define S_000E4C_SDMA_RQ_PENDING(x) (((x) & 0x1) << 0)
+#define G_000E4C_SDMA_RQ_PENDING(x) (((x) >> 0) & 0x1)
+#define C_000E4C_SDMA_RQ_PENDING 0xFFFFFFFE
+#define S_000E4C_TST_RQ_PENDING(x) (((x) & 0x1) << 1)
+#define G_000E4C_TST_RQ_PENDING(x) (((x) >> 1) & 0x1)
+#define C_000E4C_TST_RQ_PENDING 0xFFFFFFFD
+#define S_000E4C_SDMA1_RQ_PENDING(x) (((x) & 0x1) << 2)
+#define G_000E4C_SDMA1_RQ_PENDING(x) (((x) >> 2) & 0x1)
+#define C_000E4C_SDMA1_RQ_PENDING 0xFFFFFFFB
+#define S_000E4C_VCE0_RQ_PENDING(x) (((x) & 0x1) << 3)
+#define G_000E4C_VCE0_RQ_PENDING(x) (((x) >> 3) & 0x1)
+#define C_000E4C_VCE0_RQ_PENDING 0xFFFFFFF7
+#define S_000E4C_VP8_BUSY(x) (((x) & 0x1) << 4)
+#define G_000E4C_VP8_BUSY(x) (((x) >> 4) & 0x1)
+#define C_000E4C_VP8_BUSY 0xFFFFFFEF
+#define S_000E4C_SDMA_BUSY(x) (((x) & 0x1) << 5)
+#define G_000E4C_SDMA_BUSY(x) (((x) >> 5) & 0x1)
+#define C_000E4C_SDMA_BUSY 0xFFFFFFDF
+#define S_000E4C_SDMA1_BUSY(x) (((x) & 0x1) << 6)
+#define G_000E4C_SDMA1_BUSY(x) (((x) >> 6) & 0x1)
+#define C_000E4C_SDMA1_BUSY 0xFFFFFFBF
+#define S_000E4C_VCE0_BUSY(x) (((x) & 0x1) << 7)
+#define G_000E4C_VCE0_BUSY(x) (((x) >> 7) & 0x1)
+#define C_000E4C_VCE0_BUSY 0xFFFFFF7F
+#define S_000E4C_XDMA_BUSY(x) (((x) & 0x1) << 8)
+#define G_000E4C_XDMA_BUSY(x) (((x) >> 8) & 0x1)
+#define C_000E4C_XDMA_BUSY 0xFFFFFEFF
+#define S_000E4C_CHUB_BUSY(x) (((x) & 0x1) << 9)
+#define G_000E4C_CHUB_BUSY(x) (((x) >> 9) & 0x1)
+#define C_000E4C_CHUB_BUSY 0xFFFFFDFF
+#define S_000E4C_SDMA2_BUSY(x) (((x) & 0x1) << 10)
+#define G_000E4C_SDMA2_BUSY(x) (((x) >> 10) & 0x1)
+#define C_000E4C_SDMA2_BUSY 0xFFFFFBFF
+#define S_000E4C_SDMA3_BUSY(x) (((x) & 0x1) << 11)
+#define G_000E4C_SDMA3_BUSY(x) (((x) >> 11) & 0x1)
+#define C_000E4C_SDMA3_BUSY 0xFFFFF7FF
+#define S_000E4C_SAMSCP_BUSY(x) (((x) & 0x1) << 12)
+#define G_000E4C_SAMSCP_BUSY(x) (((x) >> 12) & 0x1)
+#define C_000E4C_SAMSCP_BUSY 0xFFFFEFFF
+#define S_000E4C_ISP_BUSY(x) (((x) & 0x1) << 13)
+#define G_000E4C_ISP_BUSY(x) (((x) >> 13) & 0x1)
+#define C_000E4C_ISP_BUSY 0xFFFFDFFF
+#define S_000E4C_VCE1_BUSY(x) (((x) & 0x1) << 14)
+#define G_000E4C_VCE1_BUSY(x) (((x) >> 14) & 0x1)
+#define C_000E4C_VCE1_BUSY 0xFFFFBFFF
+#define S_000E4C_ODE_BUSY(x) (((x) & 0x1) << 15)
+#define G_000E4C_ODE_BUSY(x) (((x) >> 15) & 0x1)
+#define C_000E4C_ODE_BUSY 0xFFFF7FFF
+#define S_000E4C_SDMA2_RQ_PENDING(x) (((x) & 0x1) << 16)
+#define G_000E4C_SDMA2_RQ_PENDING(x) (((x) >> 16) & 0x1)
+#define C_000E4C_SDMA2_RQ_PENDING 0xFFFEFFFF
+#define S_000E4C_SDMA3_RQ_PENDING(x) (((x) & 0x1) << 17)
+#define G_000E4C_SDMA3_RQ_PENDING(x) (((x) >> 17) & 0x1)
+#define C_000E4C_SDMA3_RQ_PENDING 0xFFFDFFFF
+#define S_000E4C_SAMSCP_RQ_PENDING(x) (((x) & 0x1) << 18)
+#define G_000E4C_SAMSCP_RQ_PENDING(x) (((x) >> 18) & 0x1)
+#define C_000E4C_SAMSCP_RQ_PENDING 0xFFFBFFFF
+#define S_000E4C_ISP_RQ_PENDING(x) (((x) & 0x1) << 19)
+#define G_000E4C_ISP_RQ_PENDING(x) (((x) >> 19) & 0x1)
+#define C_000E4C_ISP_RQ_PENDING 0xFFF7FFFF
+#define S_000E4C_VCE1_RQ_PENDING(x) (((x) & 0x1) << 20)
+#define G_000E4C_VCE1_RQ_PENDING(x) (((x) >> 20) & 0x1)
+#define C_000E4C_VCE1_RQ_PENDING 0xFFEFFFFF
+#define R_000E50_SRBM_STATUS 0x000E50
+#define S_000E50_UVD_RQ_PENDING(x) (((x) & 0x1) << 1)
+#define G_000E50_UVD_RQ_PENDING(x) (((x) >> 1) & 0x1)
+#define C_000E50_UVD_RQ_PENDING 0xFFFFFFFD
+#define S_000E50_SAMMSP_RQ_PENDING(x) (((x) & 0x1) << 2)
+#define G_000E50_SAMMSP_RQ_PENDING(x) (((x) >> 2) & 0x1)
+#define C_000E50_SAMMSP_RQ_PENDING 0xFFFFFFFB
+#define S_000E50_ACP_RQ_PENDING(x) (((x) & 0x1) << 3)
+#define G_000E50_ACP_RQ_PENDING(x) (((x) >> 3) & 0x1)
+#define C_000E50_ACP_RQ_PENDING 0xFFFFFFF7
+#define S_000E50_SMU_RQ_PENDING(x) (((x) & 0x1) << 4)
+#define G_000E50_SMU_RQ_PENDING(x) (((x) >> 4) & 0x1)
+#define C_000E50_SMU_RQ_PENDING 0xFFFFFFEF
+#define S_000E50_GRBM_RQ_PENDING(x) (((x) & 0x1) << 5)
+#define G_000E50_GRBM_RQ_PENDING(x) (((x) >> 5) & 0x1)
+#define C_000E50_GRBM_RQ_PENDING 0xFFFFFFDF
+#define S_000E50_HI_RQ_PENDING(x) (((x) & 0x1) << 6)
+#define G_000E50_HI_RQ_PENDING(x) (((x) >> 6) & 0x1)
+#define C_000E50_HI_RQ_PENDING 0xFFFFFFBF
+#define S_000E50_VMC_BUSY(x) (((x) & 0x1) << 8)
+#define G_000E50_VMC_BUSY(x) (((x) >> 8) & 0x1)
+#define C_000E50_VMC_BUSY 0xFFFFFEFF
+#define S_000E50_MCB_BUSY(x) (((x) & 0x1) << 9)
+#define G_000E50_MCB_BUSY(x) (((x) >> 9) & 0x1)
+#define C_000E50_MCB_BUSY 0xFFFFFDFF
+#define S_000E50_MCB_NON_DISPLAY_BUSY(x) (((x) & 0x1) << 10)
+#define G_000E50_MCB_NON_DISPLAY_BUSY(x) (((x) >> 10) & 0x1)
+#define C_000E50_MCB_NON_DISPLAY_BUSY 0xFFFFFBFF
+#define S_000E50_MCC_BUSY(x) (((x) & 0x1) << 11)
+#define G_000E50_MCC_BUSY(x) (((x) >> 11) & 0x1)
+#define C_000E50_MCC_BUSY 0xFFFFF7FF
+#define S_000E50_MCD_BUSY(x) (((x) & 0x1) << 12)
+#define G_000E50_MCD_BUSY(x) (((x) >> 12) & 0x1)
+#define C_000E50_MCD_BUSY 0xFFFFEFFF
+#define S_000E50_VMC1_BUSY(x) (((x) & 0x1) << 13)
+#define G_000E50_VMC1_BUSY(x) (((x) >> 13) & 0x1)
+#define C_000E50_VMC1_BUSY 0xFFFFDFFF
+#define S_000E50_SEM_BUSY(x) (((x) & 0x1) << 14)
+#define G_000E50_SEM_BUSY(x) (((x) >> 14) & 0x1)
+#define C_000E50_SEM_BUSY 0xFFFFBFFF
+#define S_000E50_ACP_BUSY(x) (((x) & 0x1) << 16)
+#define G_000E50_ACP_BUSY(x) (((x) >> 16) & 0x1)
+#define C_000E50_ACP_BUSY 0xFFFEFFFF
+#define S_000E50_IH_BUSY(x) (((x) & 0x1) << 17)
+#define G_000E50_IH_BUSY(x) (((x) >> 17) & 0x1)
+#define C_000E50_IH_BUSY 0xFFFDFFFF
+#define S_000E50_UVD_BUSY(x) (((x) & 0x1) << 19)
+#define G_000E50_UVD_BUSY(x) (((x) >> 19) & 0x1)
+#define C_000E50_UVD_BUSY 0xFFF7FFFF
+#define S_000E50_SAMMSP_BUSY(x) (((x) & 0x1) << 20)
+#define G_000E50_SAMMSP_BUSY(x) (((x) >> 20) & 0x1)
+#define C_000E50_SAMMSP_BUSY 0xFFEFFFFF
+#define S_000E50_GCATCL2_BUSY(x) (((x) & 0x1) << 21)
+#define G_000E50_GCATCL2_BUSY(x) (((x) >> 21) & 0x1)
+#define C_000E50_GCATCL2_BUSY 0xFFDFFFFF
+#define S_000E50_OSATCL2_BUSY(x) (((x) & 0x1) << 22)
+#define G_000E50_OSATCL2_BUSY(x) (((x) >> 22) & 0x1)
+#define C_000E50_OSATCL2_BUSY 0xFFBFFFFF
+#define S_000E50_BIF_BUSY(x) (((x) & 0x1) << 29)
+#define G_000E50_BIF_BUSY(x) (((x) >> 29) & 0x1)
+#define C_000E50_BIF_BUSY 0xDFFFFFFF
+#define R_000E54_SRBM_STATUS3 0x000E54
+#define S_000E54_MCC0_BUSY(x) (((x) & 0x1) << 0)
+#define G_000E54_MCC0_BUSY(x) (((x) >> 0) & 0x1)
+#define C_000E54_MCC0_BUSY 0xFFFFFFFE
+#define S_000E54_MCC1_BUSY(x) (((x) & 0x1) << 1)
+#define G_000E54_MCC1_BUSY(x) (((x) >> 1) & 0x1)
+#define C_000E54_MCC1_BUSY 0xFFFFFFFD
+#define S_000E54_MCC2_BUSY(x) (((x) & 0x1) << 2)
+#define G_000E54_MCC2_BUSY(x) (((x) >> 2) & 0x1)
+#define C_000E54_MCC2_BUSY 0xFFFFFFFB
+#define S_000E54_MCC3_BUSY(x) (((x) & 0x1) << 3)
+#define G_000E54_MCC3_BUSY(x) (((x) >> 3) & 0x1)
+#define C_000E54_MCC3_BUSY 0xFFFFFFF7
+#define S_000E54_MCC4_BUSY(x) (((x) & 0x1) << 4)
+#define G_000E54_MCC4_BUSY(x) (((x) >> 4) & 0x1)
+#define C_000E54_MCC4_BUSY 0xFFFFFFEF
+#define S_000E54_MCC5_BUSY(x) (((x) & 0x1) << 5)
+#define G_000E54_MCC5_BUSY(x) (((x) >> 5) & 0x1)
+#define C_000E54_MCC5_BUSY 0xFFFFFFDF
+#define S_000E54_MCC6_BUSY(x) (((x) & 0x1) << 6)
+#define G_000E54_MCC6_BUSY(x) (((x) >> 6) & 0x1)
+#define C_000E54_MCC6_BUSY 0xFFFFFFBF
+#define S_000E54_MCC7_BUSY(x) (((x) & 0x1) << 7)
+#define G_000E54_MCC7_BUSY(x) (((x) >> 7) & 0x1)
+#define C_000E54_MCC7_BUSY 0xFFFFFF7F
+#define S_000E54_MCD0_BUSY(x) (((x) & 0x1) << 8)
+#define G_000E54_MCD0_BUSY(x) (((x) >> 8) & 0x1)
+#define C_000E54_MCD0_BUSY 0xFFFFFEFF
+#define S_000E54_MCD1_BUSY(x) (((x) & 0x1) << 9)
+#define G_000E54_MCD1_BUSY(x) (((x) >> 9) & 0x1)
+#define C_000E54_MCD1_BUSY 0xFFFFFDFF
+#define S_000E54_MCD2_BUSY(x) (((x) & 0x1) << 10)
+#define G_000E54_MCD2_BUSY(x) (((x) >> 10) & 0x1)
+#define C_000E54_MCD2_BUSY 0xFFFFFBFF
+#define S_000E54_MCD3_BUSY(x) (((x) & 0x1) << 11)
+#define G_000E54_MCD3_BUSY(x) (((x) >> 11) & 0x1)
+#define C_000E54_MCD3_BUSY 0xFFFFF7FF
+#define S_000E54_MCD4_BUSY(x) (((x) & 0x1) << 12)
+#define G_000E54_MCD4_BUSY(x) (((x) >> 12) & 0x1)
+#define C_000E54_MCD4_BUSY 0xFFFFEFFF
+#define S_000E54_MCD5_BUSY(x) (((x) & 0x1) << 13)
+#define G_000E54_MCD5_BUSY(x) (((x) >> 13) & 0x1)
+#define C_000E54_MCD5_BUSY 0xFFFFDFFF
+#define S_000E54_MCD6_BUSY(x) (((x) & 0x1) << 14)
+#define G_000E54_MCD6_BUSY(x) (((x) >> 14) & 0x1)
+#define C_000E54_MCD6_BUSY 0xFFFFBFFF
+#define S_000E54_MCD7_BUSY(x) (((x) & 0x1) << 15)
+#define G_000E54_MCD7_BUSY(x) (((x) >> 15) & 0x1)
+#define C_000E54_MCD7_BUSY 0xFFFF7FFF
+#define R_00D034_SDMA0_STATUS_REG 0x00D034
+#define S_00D034_IDLE(x) (((x) & 0x1) << 0)
+#define G_00D034_IDLE(x) (((x) >> 0) & 0x1)
+#define C_00D034_IDLE 0xFFFFFFFE
+#define S_00D034_REG_IDLE(x) (((x) & 0x1) << 1)
+#define G_00D034_REG_IDLE(x) (((x) >> 1) & 0x1)
+#define C_00D034_REG_IDLE 0xFFFFFFFD
+#define S_00D034_RB_EMPTY(x) (((x) & 0x1) << 2)
+#define G_00D034_RB_EMPTY(x) (((x) >> 2) & 0x1)
+#define C_00D034_RB_EMPTY 0xFFFFFFFB
+#define S_00D034_RB_FULL(x) (((x) & 0x1) << 3)
+#define G_00D034_RB_FULL(x) (((x) >> 3) & 0x1)
+#define C_00D034_RB_FULL 0xFFFFFFF7
+#define S_00D034_RB_CMD_IDLE(x) (((x) & 0x1) << 4)
+#define G_00D034_RB_CMD_IDLE(x) (((x) >> 4) & 0x1)
+#define C_00D034_RB_CMD_IDLE 0xFFFFFFEF
+#define S_00D034_RB_CMD_FULL(x) (((x) & 0x1) << 5)
+#define G_00D034_RB_CMD_FULL(x) (((x) >> 5) & 0x1)
+#define C_00D034_RB_CMD_FULL 0xFFFFFFDF
+#define S_00D034_IB_CMD_IDLE(x) (((x) & 0x1) << 6)
+#define G_00D034_IB_CMD_IDLE(x) (((x) >> 6) & 0x1)
+#define C_00D034_IB_CMD_IDLE 0xFFFFFFBF
+#define S_00D034_IB_CMD_FULL(x) (((x) & 0x1) << 7)
+#define G_00D034_IB_CMD_FULL(x) (((x) >> 7) & 0x1)
+#define C_00D034_IB_CMD_FULL 0xFFFFFF7F
+#define S_00D034_BLOCK_IDLE(x) (((x) & 0x1) << 8)
+#define G_00D034_BLOCK_IDLE(x) (((x) >> 8) & 0x1)
+#define C_00D034_BLOCK_IDLE 0xFFFFFEFF
+#define S_00D034_INSIDE_IB(x) (((x) & 0x1) << 9)
+#define G_00D034_INSIDE_IB(x) (((x) >> 9) & 0x1)
+#define C_00D034_INSIDE_IB 0xFFFFFDFF
+#define S_00D034_EX_IDLE(x) (((x) & 0x1) << 10)
+#define G_00D034_EX_IDLE(x) (((x) >> 10) & 0x1)
+#define C_00D034_EX_IDLE 0xFFFFFBFF
+#define S_00D034_EX_IDLE_POLL_TIMER_EXPIRE(x) (((x) & 0x1) << 11)
+#define G_00D034_EX_IDLE_POLL_TIMER_EXPIRE(x) (((x) >> 11) & 0x1)
+#define C_00D034_EX_IDLE_POLL_TIMER_EXPIRE 0xFFFFF7FF
+#define S_00D034_PACKET_READY(x) (((x) & 0x1) << 12)
+#define G_00D034_PACKET_READY(x) (((x) >> 12) & 0x1)
+#define C_00D034_PACKET_READY 0xFFFFEFFF
+#define S_00D034_MC_WR_IDLE(x) (((x) & 0x1) << 13)
+#define G_00D034_MC_WR_IDLE(x) (((x) >> 13) & 0x1)
+#define C_00D034_MC_WR_IDLE 0xFFFFDFFF
+#define S_00D034_SRBM_IDLE(x) (((x) & 0x1) << 14)
+#define G_00D034_SRBM_IDLE(x) (((x) >> 14) & 0x1)
+#define C_00D034_SRBM_IDLE 0xFFFFBFFF
+#define S_00D034_CONTEXT_EMPTY(x) (((x) & 0x1) << 15)
+#define G_00D034_CONTEXT_EMPTY(x) (((x) >> 15) & 0x1)
+#define C_00D034_CONTEXT_EMPTY 0xFFFF7FFF
+#define S_00D034_DELTA_RPTR_FULL(x) (((x) & 0x1) << 16)
+#define G_00D034_DELTA_RPTR_FULL(x) (((x) >> 16) & 0x1)
+#define C_00D034_DELTA_RPTR_FULL 0xFFFEFFFF
+#define S_00D034_RB_MC_RREQ_IDLE(x) (((x) & 0x1) << 17)
+#define G_00D034_RB_MC_RREQ_IDLE(x) (((x) >> 17) & 0x1)
+#define C_00D034_RB_MC_RREQ_IDLE 0xFFFDFFFF
+#define S_00D034_IB_MC_RREQ_IDLE(x) (((x) & 0x1) << 18)
+#define G_00D034_IB_MC_RREQ_IDLE(x) (((x) >> 18) & 0x1)
+#define C_00D034_IB_MC_RREQ_IDLE 0xFFFBFFFF
+#define S_00D034_MC_RD_IDLE(x) (((x) & 0x1) << 19)
+#define G_00D034_MC_RD_IDLE(x) (((x) >> 19) & 0x1)
+#define C_00D034_MC_RD_IDLE 0xFFF7FFFF
+#define S_00D034_DELTA_RPTR_EMPTY(x) (((x) & 0x1) << 20)
+#define G_00D034_DELTA_RPTR_EMPTY(x) (((x) >> 20) & 0x1)
+#define C_00D034_DELTA_RPTR_EMPTY 0xFFEFFFFF
+#define S_00D034_MC_RD_RET_STALL(x) (((x) & 0x1) << 21)
+#define G_00D034_MC_RD_RET_STALL(x) (((x) >> 21) & 0x1)
+#define C_00D034_MC_RD_RET_STALL 0xFFDFFFFF
+#define S_00D034_MC_RD_NO_POLL_IDLE(x) (((x) & 0x1) << 22)
+#define G_00D034_MC_RD_NO_POLL_IDLE(x) (((x) >> 22) & 0x1)
+#define C_00D034_MC_RD_NO_POLL_IDLE 0xFFBFFFFF
+#define S_00D034_PREV_CMD_IDLE(x) (((x) & 0x1) << 25)
+#define G_00D034_PREV_CMD_IDLE(x) (((x) >> 25) & 0x1)
+#define C_00D034_PREV_CMD_IDLE 0xFDFFFFFF
+#define S_00D034_SEM_IDLE(x) (((x) & 0x1) << 26)
+#define G_00D034_SEM_IDLE(x) (((x) >> 26) & 0x1)
+#define C_00D034_SEM_IDLE 0xFBFFFFFF
+#define S_00D034_SEM_REQ_STALL(x) (((x) & 0x1) << 27)
+#define G_00D034_SEM_REQ_STALL(x) (((x) >> 27) & 0x1)
+#define C_00D034_SEM_REQ_STALL 0xF7FFFFFF
+#define S_00D034_SEM_RESP_STATE(x) (((x) & 0x03) << 28)
+#define G_00D034_SEM_RESP_STATE(x) (((x) >> 28) & 0x03)
+#define C_00D034_SEM_RESP_STATE 0xCFFFFFFF
+#define S_00D034_INT_IDLE(x) (((x) & 0x1) << 30)
+#define G_00D034_INT_IDLE(x) (((x) >> 30) & 0x1)
+#define C_00D034_INT_IDLE 0xBFFFFFFF
+#define S_00D034_INT_REQ_STALL(x) (((x) & 0x1) << 31)
+#define G_00D034_INT_REQ_STALL(x) (((x) >> 31) & 0x1)
+#define C_00D034_INT_REQ_STALL 0x7FFFFFFF
+#define R_00D834_SDMA1_STATUS_REG 0x00D834
+#define R_008008_GRBM_STATUS2 0x008008
+#define S_008008_ME0PIPE1_CMDFIFO_AVAIL(x) (((x) & 0x0F) << 0)
+#define G_008008_ME0PIPE1_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x0F)
+#define C_008008_ME0PIPE1_CMDFIFO_AVAIL 0xFFFFFFF0
+#define S_008008_ME0PIPE1_CF_RQ_PENDING(x) (((x) & 0x1) << 4)
+#define G_008008_ME0PIPE1_CF_RQ_PENDING(x) (((x) >> 4) & 0x1)
+#define C_008008_ME0PIPE1_CF_RQ_PENDING 0xFFFFFFEF
+#define S_008008_ME0PIPE1_PF_RQ_PENDING(x) (((x) & 0x1) << 5)
+#define G_008008_ME0PIPE1_PF_RQ_PENDING(x) (((x) >> 5) & 0x1)
+#define C_008008_ME0PIPE1_PF_RQ_PENDING 0xFFFFFFDF
+#define S_008008_ME1PIPE0_RQ_PENDING(x) (((x) & 0x1) << 6)
+#define G_008008_ME1PIPE0_RQ_PENDING(x) (((x) >> 6) & 0x1)
+#define C_008008_ME1PIPE0_RQ_PENDING 0xFFFFFFBF
+#define S_008008_ME1PIPE1_RQ_PENDING(x) (((x) & 0x1) << 7)
+#define G_008008_ME1PIPE1_RQ_PENDING(x) (((x) >> 7) & 0x1)
+#define C_008008_ME1PIPE1_RQ_PENDING 0xFFFFFF7F
+#define S_008008_ME1PIPE2_RQ_PENDING(x) (((x) & 0x1) << 8)
+#define G_008008_ME1PIPE2_RQ_PENDING(x) (((x) >> 8) & 0x1)
+#define C_008008_ME1PIPE2_RQ_PENDING 0xFFFFFEFF
+#define S_008008_ME1PIPE3_RQ_PENDING(x) (((x) & 0x1) << 9)
+#define G_008008_ME1PIPE3_RQ_PENDING(x) (((x) >> 9) & 0x1)
+#define C_008008_ME1PIPE3_RQ_PENDING 0xFFFFFDFF
+#define S_008008_ME2PIPE0_RQ_PENDING(x) (((x) & 0x1) << 10)
+#define G_008008_ME2PIPE0_RQ_PENDING(x) (((x) >> 10) & 0x1)
+#define C_008008_ME2PIPE0_RQ_PENDING 0xFFFFFBFF
+#define S_008008_ME2PIPE1_RQ_PENDING(x) (((x) & 0x1) << 11)
+#define G_008008_ME2PIPE1_RQ_PENDING(x) (((x) >> 11) & 0x1)
+#define C_008008_ME2PIPE1_RQ_PENDING 0xFFFFF7FF
+#define S_008008_ME2PIPE2_RQ_PENDING(x) (((x) & 0x1) << 12)
+#define G_008008_ME2PIPE2_RQ_PENDING(x) (((x) >> 12) & 0x1)
+#define C_008008_ME2PIPE2_RQ_PENDING 0xFFFFEFFF
+#define S_008008_ME2PIPE3_RQ_PENDING(x) (((x) & 0x1) << 13)
+#define G_008008_ME2PIPE3_RQ_PENDING(x) (((x) >> 13) & 0x1)
+#define C_008008_ME2PIPE3_RQ_PENDING 0xFFFFDFFF
+#define S_008008_RLC_RQ_PENDING(x) (((x) & 0x1) << 14)
+#define G_008008_RLC_RQ_PENDING(x) (((x) >> 14) & 0x1)
+#define C_008008_RLC_RQ_PENDING 0xFFFFBFFF
+#define S_008008_RLC_BUSY(x) (((x) & 0x1) << 24)
+#define G_008008_RLC_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008008_RLC_BUSY 0xFEFFFFFF
+#define S_008008_TC_BUSY(x) (((x) & 0x1) << 25)
+#define G_008008_TC_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008008_TC_BUSY 0xFDFFFFFF
+#define S_008008_TCC_CC_RESIDENT(x) (((x) & 0x1) << 26)
+#define G_008008_TCC_CC_RESIDENT(x) (((x) >> 26) & 0x1)
+#define C_008008_TCC_CC_RESIDENT 0xFBFFFFFF
+#define S_008008_CPF_BUSY(x) (((x) & 0x1) << 28)
+#define G_008008_CPF_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008008_CPF_BUSY 0xEFFFFFFF
+#define S_008008_CPC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008008_CPC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008008_CPC_BUSY 0xDFFFFFFF
+#define S_008008_CPG_BUSY(x) (((x) & 0x1) << 30)
+#define G_008008_CPG_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008008_CPG_BUSY 0xBFFFFFFF
+#define R_008010_GRBM_STATUS 0x008010
+#define S_008010_ME0PIPE0_CMDFIFO_AVAIL(x) (((x) & 0x0F) << 0)
+#define G_008010_ME0PIPE0_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x0F)
+#define C_008010_ME0PIPE0_CMDFIFO_AVAIL 0xFFFFFFF0
+#define S_008010_SRBM_RQ_PENDING(x) (((x) & 0x1) << 5)
+#define G_008010_SRBM_RQ_PENDING(x) (((x) >> 5) & 0x1)
+#define C_008010_SRBM_RQ_PENDING 0xFFFFFFDF
+#define S_008010_ME0PIPE0_CF_RQ_PENDING(x) (((x) & 0x1) << 7)
+#define G_008010_ME0PIPE0_CF_RQ_PENDING(x) (((x) >> 7) & 0x1)
+#define C_008010_ME0PIPE0_CF_RQ_PENDING 0xFFFFFF7F
+#define S_008010_ME0PIPE0_PF_RQ_PENDING(x) (((x) & 0x1) << 8)
+#define G_008010_ME0PIPE0_PF_RQ_PENDING(x) (((x) >> 8) & 0x1)
+#define C_008010_ME0PIPE0_PF_RQ_PENDING 0xFFFFFEFF
+#define S_008010_GDS_DMA_RQ_PENDING(x) (((x) & 0x1) << 9)
+#define G_008010_GDS_DMA_RQ_PENDING(x) (((x) >> 9) & 0x1)
+#define C_008010_GDS_DMA_RQ_PENDING 0xFFFFFDFF
+#define S_008010_DB_CLEAN(x) (((x) & 0x1) << 12)
+#define G_008010_DB_CLEAN(x) (((x) >> 12) & 0x1)
+#define C_008010_DB_CLEAN 0xFFFFEFFF
+#define S_008010_CB_CLEAN(x) (((x) & 0x1) << 13)
+#define G_008010_CB_CLEAN(x) (((x) >> 13) & 0x1)
+#define C_008010_CB_CLEAN 0xFFFFDFFF
+#define S_008010_TA_BUSY(x) (((x) & 0x1) << 14)
+#define G_008010_TA_BUSY(x) (((x) >> 14) & 0x1)
+#define C_008010_TA_BUSY 0xFFFFBFFF
+#define S_008010_GDS_BUSY(x) (((x) & 0x1) << 15)
+#define G_008010_GDS_BUSY(x) (((x) >> 15) & 0x1)
+#define C_008010_GDS_BUSY 0xFFFF7FFF
+#define S_008010_WD_BUSY_NO_DMA(x) (((x) & 0x1) << 16)
+#define G_008010_WD_BUSY_NO_DMA(x) (((x) >> 16) & 0x1)
+#define C_008010_WD_BUSY_NO_DMA 0xFFFEFFFF
+#define S_008010_VGT_BUSY(x) (((x) & 0x1) << 17)
+#define G_008010_VGT_BUSY(x) (((x) >> 17) & 0x1)
+#define C_008010_VGT_BUSY 0xFFFDFFFF
+#define S_008010_IA_BUSY_NO_DMA(x) (((x) & 0x1) << 18)
+#define G_008010_IA_BUSY_NO_DMA(x) (((x) >> 18) & 0x1)
+#define C_008010_IA_BUSY_NO_DMA 0xFFFBFFFF
+#define S_008010_IA_BUSY(x) (((x) & 0x1) << 19)
+#define G_008010_IA_BUSY(x) (((x) >> 19) & 0x1)
+#define C_008010_IA_BUSY 0xFFF7FFFF
+#define S_008010_SX_BUSY(x) (((x) & 0x1) << 20)
+#define G_008010_SX_BUSY(x) (((x) >> 20) & 0x1)
+#define C_008010_SX_BUSY 0xFFEFFFFF
+#define S_008010_WD_BUSY(x) (((x) & 0x1) << 21)
+#define G_008010_WD_BUSY(x) (((x) >> 21) & 0x1)
+#define C_008010_WD_BUSY 0xFFDFFFFF
+#define S_008010_SPI_BUSY(x) (((x) & 0x1) << 22)
+#define G_008010_SPI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008010_SPI_BUSY 0xFFBFFFFF
+#define S_008010_BCI_BUSY(x) (((x) & 0x1) << 23)
+#define G_008010_BCI_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008010_BCI_BUSY 0xFF7FFFFF
+#define S_008010_SC_BUSY(x) (((x) & 0x1) << 24)
+#define G_008010_SC_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008010_SC_BUSY 0xFEFFFFFF
+#define S_008010_PA_BUSY(x) (((x) & 0x1) << 25)
+#define G_008010_PA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008010_PA_BUSY 0xFDFFFFFF
+#define S_008010_DB_BUSY(x) (((x) & 0x1) << 26)
+#define G_008010_DB_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008010_DB_BUSY 0xFBFFFFFF
+#define S_008010_CP_COHERENCY_BUSY(x) (((x) & 0x1) << 28)
+#define G_008010_CP_COHERENCY_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008010_CP_COHERENCY_BUSY 0xEFFFFFFF
+#define S_008010_CP_BUSY(x) (((x) & 0x1) << 29)
+#define G_008010_CP_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008010_CP_BUSY 0xDFFFFFFF
+#define S_008010_CB_BUSY(x) (((x) & 0x1) << 30)
+#define G_008010_CB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008010_CB_BUSY 0xBFFFFFFF
+#define S_008010_GUI_ACTIVE(x) (((x) & 0x1) << 31)
+#define G_008010_GUI_ACTIVE(x) (((x) >> 31) & 0x1)
+#define C_008010_GUI_ACTIVE 0x7FFFFFFF
#define GRBM_GFX_INDEX 0x802C
#define INSTANCE_INDEX(x) ((x) << 0)
#define SH_INDEX(x) ((x) << 8)
@@ -276,12 +668,155 @@
#define C_0085F0_SH_ICACHE_ACTION_ENA 0xDFFFFFFF
#define R_0085F4_CP_COHER_SIZE 0x0085F4
#define R_0085F8_CP_COHER_BASE 0x0085F8
-
+#define R_008014_GRBM_STATUS_SE0 0x008014
+#define S_008014_DB_CLEAN(x) (((x) & 0x1) << 1)
+#define G_008014_DB_CLEAN(x) (((x) >> 1) & 0x1)
+#define C_008014_DB_CLEAN 0xFFFFFFFD
+#define S_008014_CB_CLEAN(x) (((x) & 0x1) << 2)
+#define G_008014_CB_CLEAN(x) (((x) >> 2) & 0x1)
+#define C_008014_CB_CLEAN 0xFFFFFFFB
+#define S_008014_BCI_BUSY(x) (((x) & 0x1) << 22)
+#define G_008014_BCI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008014_BCI_BUSY 0xFFBFFFFF
+#define S_008014_VGT_BUSY(x) (((x) & 0x1) << 23)
+#define G_008014_VGT_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008014_VGT_BUSY 0xFF7FFFFF
+#define S_008014_PA_BUSY(x) (((x) & 0x1) << 24)
+#define G_008014_PA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008014_PA_BUSY 0xFEFFFFFF
+#define S_008014_TA_BUSY(x) (((x) & 0x1) << 25)
+#define G_008014_TA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008014_TA_BUSY 0xFDFFFFFF
+#define S_008014_SX_BUSY(x) (((x) & 0x1) << 26)
+#define G_008014_SX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008014_SX_BUSY 0xFBFFFFFF
+#define S_008014_SPI_BUSY(x) (((x) & 0x1) << 27)
+#define G_008014_SPI_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008014_SPI_BUSY 0xF7FFFFFF
+#define S_008014_SC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008014_SC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008014_SC_BUSY 0xDFFFFFFF
+#define S_008014_DB_BUSY(x) (((x) & 0x1) << 30)
+#define G_008014_DB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008014_DB_BUSY 0xBFFFFFFF
+#define S_008014_CB_BUSY(x) (((x) & 0x1) << 31)
+#define G_008014_CB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008014_CB_BUSY 0x7FFFFFFF
+#define R_008018_GRBM_STATUS_SE1 0x008018
+#define S_008018_DB_CLEAN(x) (((x) & 0x1) << 1)
+#define G_008018_DB_CLEAN(x) (((x) >> 1) & 0x1)
+#define C_008018_DB_CLEAN 0xFFFFFFFD
+#define S_008018_CB_CLEAN(x) (((x) & 0x1) << 2)
+#define G_008018_CB_CLEAN(x) (((x) >> 2) & 0x1)
+#define C_008018_CB_CLEAN 0xFFFFFFFB
+#define S_008018_BCI_BUSY(x) (((x) & 0x1) << 22)
+#define G_008018_BCI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008018_BCI_BUSY 0xFFBFFFFF
+#define S_008018_VGT_BUSY(x) (((x) & 0x1) << 23)
+#define G_008018_VGT_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008018_VGT_BUSY 0xFF7FFFFF
+#define S_008018_PA_BUSY(x) (((x) & 0x1) << 24)
+#define G_008018_PA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008018_PA_BUSY 0xFEFFFFFF
+#define S_008018_TA_BUSY(x) (((x) & 0x1) << 25)
+#define G_008018_TA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008018_TA_BUSY 0xFDFFFFFF
+#define S_008018_SX_BUSY(x) (((x) & 0x1) << 26)
+#define G_008018_SX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008018_SX_BUSY 0xFBFFFFFF
+#define S_008018_SPI_BUSY(x) (((x) & 0x1) << 27)
+#define G_008018_SPI_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008018_SPI_BUSY 0xF7FFFFFF
+#define S_008018_SC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008018_SC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008018_SC_BUSY 0xDFFFFFFF
+#define S_008018_DB_BUSY(x) (((x) & 0x1) << 30)
+#define G_008018_DB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008018_DB_BUSY 0xBFFFFFFF
+#define S_008018_CB_BUSY(x) (((x) & 0x1) << 31)
+#define G_008018_CB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008018_CB_BUSY 0x7FFFFFFF
+#define R_008038_GRBM_STATUS_SE2 0x008038
+#define S_008038_DB_CLEAN(x) (((x) & 0x1) << 1)
+#define G_008038_DB_CLEAN(x) (((x) >> 1) & 0x1)
+#define C_008038_DB_CLEAN 0xFFFFFFFD
+#define S_008038_CB_CLEAN(x) (((x) & 0x1) << 2)
+#define G_008038_CB_CLEAN(x) (((x) >> 2) & 0x1)
+#define C_008038_CB_CLEAN 0xFFFFFFFB
+#define S_008038_BCI_BUSY(x) (((x) & 0x1) << 22)
+#define G_008038_BCI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008038_BCI_BUSY 0xFFBFFFFF
+#define S_008038_VGT_BUSY(x) (((x) & 0x1) << 23)
+#define G_008038_VGT_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008038_VGT_BUSY 0xFF7FFFFF
+#define S_008038_PA_BUSY(x) (((x) & 0x1) << 24)
+#define G_008038_PA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008038_PA_BUSY 0xFEFFFFFF
+#define S_008038_TA_BUSY(x) (((x) & 0x1) << 25)
+#define G_008038_TA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008038_TA_BUSY 0xFDFFFFFF
+#define S_008038_SX_BUSY(x) (((x) & 0x1) << 26)
+#define G_008038_SX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008038_SX_BUSY 0xFBFFFFFF
+#define S_008038_SPI_BUSY(x) (((x) & 0x1) << 27)
+#define G_008038_SPI_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008038_SPI_BUSY 0xF7FFFFFF
+#define S_008038_SC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008038_SC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008038_SC_BUSY 0xDFFFFFFF
+#define S_008038_DB_BUSY(x) (((x) & 0x1) << 30)
+#define G_008038_DB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008038_DB_BUSY 0xBFFFFFFF
+#define S_008038_CB_BUSY(x) (((x) & 0x1) << 31)
+#define G_008038_CB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008038_CB_BUSY 0x7FFFFFFF
+#define R_00803C_GRBM_STATUS_SE3 0x00803C
+#define S_00803C_DB_CLEAN(x) (((x) & 0x1) << 1)
+#define G_00803C_DB_CLEAN(x) (((x) >> 1) & 0x1)
+#define C_00803C_DB_CLEAN 0xFFFFFFFD
+#define S_00803C_CB_CLEAN(x) (((x) & 0x1) << 2)
+#define G_00803C_CB_CLEAN(x) (((x) >> 2) & 0x1)
+#define C_00803C_CB_CLEAN 0xFFFFFFFB
+#define S_00803C_BCI_BUSY(x) (((x) & 0x1) << 22)
+#define G_00803C_BCI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_00803C_BCI_BUSY 0xFFBFFFFF
+#define S_00803C_VGT_BUSY(x) (((x) & 0x1) << 23)
+#define G_00803C_VGT_BUSY(x) (((x) >> 23) & 0x1)
+#define C_00803C_VGT_BUSY 0xFF7FFFFF
+#define S_00803C_PA_BUSY(x) (((x) & 0x1) << 24)
+#define G_00803C_PA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_00803C_PA_BUSY 0xFEFFFFFF
+#define S_00803C_TA_BUSY(x) (((x) & 0x1) << 25)
+#define G_00803C_TA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_00803C_TA_BUSY 0xFDFFFFFF
+#define S_00803C_SX_BUSY(x) (((x) & 0x1) << 26)
+#define G_00803C_SX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_00803C_SX_BUSY 0xFBFFFFFF
+#define S_00803C_SPI_BUSY(x) (((x) & 0x1) << 27)
+#define G_00803C_SPI_BUSY(x) (((x) >> 27) & 0x1)
+#define C_00803C_SPI_BUSY 0xF7FFFFFF
+#define S_00803C_SC_BUSY(x) (((x) & 0x1) << 29)
+#define G_00803C_SC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_00803C_SC_BUSY 0xDFFFFFFF
+#define S_00803C_DB_BUSY(x) (((x) & 0x1) << 30)
+#define G_00803C_DB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_00803C_DB_BUSY 0xBFFFFFFF
+#define S_00803C_CB_BUSY(x) (((x) & 0x1) << 31)
+#define G_00803C_CB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_00803C_CB_BUSY 0x7FFFFFFF
/* CIK */
+#define R_0300FC_CP_STRMOUT_CNTL 0x0300FC
+#define S_0300FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
+#define G_0300FC_OFFSET_UPDATE_DONE(x) (((x) >> 0) & 0x1)
+#define C_0300FC_OFFSET_UPDATE_DONE 0xFFFFFFFE
#define R_0301E4_CP_COHER_BASE_HI 0x0301E4
#define S_0301E4_COHER_BASE_HI_256B(x) (((x) & 0xFF) << 0)
#define G_0301E4_COHER_BASE_HI_256B(x) (((x) >> 0) & 0xFF)
#define C_0301E4_COHER_BASE_HI_256B 0xFFFFFF00
+#define R_0301EC_CP_COHER_START_DELAY 0x0301EC
+#define S_0301EC_START_DELAY_COUNT(x) (((x) & 0x3F) << 0)
+#define G_0301EC_START_DELAY_COUNT(x) (((x) >> 0) & 0x3F)
+#define C_0301EC_START_DELAY_COUNT 0xFFFFFFC0
#define R_0301F0_CP_COHER_CNTL 0x0301F0
#define S_0301F0_DEST_BASE_0_ENA(x) (((x) & 0x1) << 0)
#define G_0301F0_DEST_BASE_0_ENA(x) (((x) >> 0) & 0x1)
@@ -289,6 +824,14 @@
#define S_0301F0_DEST_BASE_1_ENA(x) (((x) & 0x1) << 1)
#define G_0301F0_DEST_BASE_1_ENA(x) (((x) >> 1) & 0x1)
#define C_0301F0_DEST_BASE_1_ENA 0xFFFFFFFD
+/* VI */
+#define S_0301F0_TC_SD_ACTION_ENA(x) (((x) & 0x1) << 2)
+#define G_0301F0_TC_SD_ACTION_ENA(x) (((x) >> 2) & 0x1)
+#define C_0301F0_TC_SD_ACTION_ENA 0xFFFFFFFB
+#define S_0301F0_TC_NC_ACTION_ENA(x) (((x) & 0x1) << 3)
+#define G_0301F0_TC_NC_ACTION_ENA(x) (((x) >> 3) & 0x1)
+#define C_0301F0_TC_NC_ACTION_ENA 0xFFFFFFF7
+/* */
#define S_0301F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6)
#define G_0301F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1)
#define C_0301F0_CB0_DEST_BASE_ENA 0xFFFFFFBF
@@ -319,7 +862,7 @@
#define S_0301F0_TCL1_VOL_ACTION_ENA(x) (((x) & 0x1) << 15)
#define G_0301F0_TCL1_VOL_ACTION_ENA(x) (((x) >> 15) & 0x1)
#define C_0301F0_TCL1_VOL_ACTION_ENA 0xFFFF7FFF
-#define S_0301F0_TC_VOL_ACTION_ENA(x) (((x) & 0x1) << 16)
+#define S_0301F0_TC_VOL_ACTION_ENA(x) (((x) & 0x1) << 16) /* not on VI */
#define G_0301F0_TC_VOL_ACTION_ENA(x) (((x) >> 16) & 0x1)
#define C_0301F0_TC_VOL_ACTION_ENA 0xFFFEFFFF
#define S_0301F0_TC_WB_ACTION_ENA(x) (((x) & 0x1) << 18)
@@ -352,8 +895,389 @@
#define S_0301F0_SH_ICACHE_ACTION_ENA(x) (((x) & 0x1) << 29)
#define G_0301F0_SH_ICACHE_ACTION_ENA(x) (((x) >> 29) & 0x1)
#define C_0301F0_SH_ICACHE_ACTION_ENA 0xDFFFFFFF
+/* VI */
+#define S_0301F0_SH_KCACHE_WB_ACTION_ENA(x) (((x) & 0x1) << 30)
+#define G_0301F0_SH_KCACHE_WB_ACTION_ENA(x) (((x) >> 30) & 0x1)
+#define C_0301F0_SH_KCACHE_WB_ACTION_ENA 0xBFFFFFFF
+#define S_0301F0_SH_SD_ACTION_ENA(x) (((x) & 0x1) << 31)
+#define G_0301F0_SH_SD_ACTION_ENA(x) (((x) >> 31) & 0x1)
+#define C_0301F0_SH_SD_ACTION_ENA 0x7FFFFFFF
+/* */
#define R_0301F4_CP_COHER_SIZE 0x0301F4
#define R_0301F8_CP_COHER_BASE 0x0301F8
+#define R_0301FC_CP_COHER_STATUS 0x0301FC
+#define S_0301FC_MATCHING_GFX_CNTX(x) (((x) & 0xFF) << 0)
+#define G_0301FC_MATCHING_GFX_CNTX(x) (((x) >> 0) & 0xFF)
+#define C_0301FC_MATCHING_GFX_CNTX 0xFFFFFF00
+#define S_0301FC_MEID(x) (((x) & 0x03) << 24)
+#define G_0301FC_MEID(x) (((x) >> 24) & 0x03)
+#define C_0301FC_MEID 0xFCFFFFFF
+#define S_0301FC_PHASE1_STATUS(x) (((x) & 0x1) << 30)
+#define G_0301FC_PHASE1_STATUS(x) (((x) >> 30) & 0x1)
+#define C_0301FC_PHASE1_STATUS 0xBFFFFFFF
+#define S_0301FC_STATUS(x) (((x) & 0x1) << 31)
+#define G_0301FC_STATUS(x) (((x) >> 31) & 0x1)
+#define C_0301FC_STATUS 0x7FFFFFFF
+#define R_008210_CP_CPC_STATUS 0x008210
+#define S_008210_MEC1_BUSY(x) (((x) & 0x1) << 0)
+#define G_008210_MEC1_BUSY(x) (((x) >> 0) & 0x1)
+#define C_008210_MEC1_BUSY 0xFFFFFFFE
+#define S_008210_MEC2_BUSY(x) (((x) & 0x1) << 1)
+#define G_008210_MEC2_BUSY(x) (((x) >> 1) & 0x1)
+#define C_008210_MEC2_BUSY 0xFFFFFFFD
+#define S_008210_DC0_BUSY(x) (((x) & 0x1) << 2)
+#define G_008210_DC0_BUSY(x) (((x) >> 2) & 0x1)
+#define C_008210_DC0_BUSY 0xFFFFFFFB
+#define S_008210_DC1_BUSY(x) (((x) & 0x1) << 3)
+#define G_008210_DC1_BUSY(x) (((x) >> 3) & 0x1)
+#define C_008210_DC1_BUSY 0xFFFFFFF7
+#define S_008210_RCIU1_BUSY(x) (((x) & 0x1) << 4)
+#define G_008210_RCIU1_BUSY(x) (((x) >> 4) & 0x1)
+#define C_008210_RCIU1_BUSY 0xFFFFFFEF
+#define S_008210_RCIU2_BUSY(x) (((x) & 0x1) << 5)
+#define G_008210_RCIU2_BUSY(x) (((x) >> 5) & 0x1)
+#define C_008210_RCIU2_BUSY 0xFFFFFFDF
+#define S_008210_ROQ1_BUSY(x) (((x) & 0x1) << 6)
+#define G_008210_ROQ1_BUSY(x) (((x) >> 6) & 0x1)
+#define C_008210_ROQ1_BUSY 0xFFFFFFBF
+#define S_008210_ROQ2_BUSY(x) (((x) & 0x1) << 7)
+#define G_008210_ROQ2_BUSY(x) (((x) >> 7) & 0x1)
+#define C_008210_ROQ2_BUSY 0xFFFFFF7F
+#define S_008210_TCIU_BUSY(x) (((x) & 0x1) << 10)
+#define G_008210_TCIU_BUSY(x) (((x) >> 10) & 0x1)
+#define C_008210_TCIU_BUSY 0xFFFFFBFF
+#define S_008210_SCRATCH_RAM_BUSY(x) (((x) & 0x1) << 11)
+#define G_008210_SCRATCH_RAM_BUSY(x) (((x) >> 11) & 0x1)
+#define C_008210_SCRATCH_RAM_BUSY 0xFFFFF7FF
+#define S_008210_QU_BUSY(x) (((x) & 0x1) << 12)
+#define G_008210_QU_BUSY(x) (((x) >> 12) & 0x1)
+#define C_008210_QU_BUSY 0xFFFFEFFF
+#define S_008210_ATCL2IU_BUSY(x) (((x) & 0x1) << 13)
+#define G_008210_ATCL2IU_BUSY(x) (((x) >> 13) & 0x1)
+#define C_008210_ATCL2IU_BUSY 0xFFFFDFFF
+#define S_008210_CPG_CPC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008210_CPG_CPC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008210_CPG_CPC_BUSY 0xDFFFFFFF
+#define S_008210_CPF_CPC_BUSY(x) (((x) & 0x1) << 30)
+#define G_008210_CPF_CPC_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008210_CPF_CPC_BUSY 0xBFFFFFFF
+#define S_008210_CPC_BUSY(x) (((x) & 0x1) << 31)
+#define G_008210_CPC_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008210_CPC_BUSY 0x7FFFFFFF
+#define R_008214_CP_CPC_BUSY_STAT 0x008214
+#define S_008214_MEC1_LOAD_BUSY(x) (((x) & 0x1) << 0)
+#define G_008214_MEC1_LOAD_BUSY(x) (((x) >> 0) & 0x1)
+#define C_008214_MEC1_LOAD_BUSY 0xFFFFFFFE
+#define S_008214_MEC1_SEMAPOHRE_BUSY(x) (((x) & 0x1) << 1)
+#define G_008214_MEC1_SEMAPOHRE_BUSY(x) (((x) >> 1) & 0x1)
+#define C_008214_MEC1_SEMAPOHRE_BUSY 0xFFFFFFFD
+#define S_008214_MEC1_MUTEX_BUSY(x) (((x) & 0x1) << 2)
+#define G_008214_MEC1_MUTEX_BUSY(x) (((x) >> 2) & 0x1)
+#define C_008214_MEC1_MUTEX_BUSY 0xFFFFFFFB
+#define S_008214_MEC1_MESSAGE_BUSY(x) (((x) & 0x1) << 3)
+#define G_008214_MEC1_MESSAGE_BUSY(x) (((x) >> 3) & 0x1)
+#define C_008214_MEC1_MESSAGE_BUSY 0xFFFFFFF7
+#define S_008214_MEC1_EOP_QUEUE_BUSY(x) (((x) & 0x1) << 4)
+#define G_008214_MEC1_EOP_QUEUE_BUSY(x) (((x) >> 4) & 0x1)
+#define C_008214_MEC1_EOP_QUEUE_BUSY 0xFFFFFFEF
+#define S_008214_MEC1_IQ_QUEUE_BUSY(x) (((x) & 0x1) << 5)
+#define G_008214_MEC1_IQ_QUEUE_BUSY(x) (((x) >> 5) & 0x1)
+#define C_008214_MEC1_IQ_QUEUE_BUSY 0xFFFFFFDF
+#define S_008214_MEC1_IB_QUEUE_BUSY(x) (((x) & 0x1) << 6)
+#define G_008214_MEC1_IB_QUEUE_BUSY(x) (((x) >> 6) & 0x1)
+#define C_008214_MEC1_IB_QUEUE_BUSY 0xFFFFFFBF
+#define S_008214_MEC1_TC_BUSY(x) (((x) & 0x1) << 7)
+#define G_008214_MEC1_TC_BUSY(x) (((x) >> 7) & 0x1)
+#define C_008214_MEC1_TC_BUSY 0xFFFFFF7F
+#define S_008214_MEC1_DMA_BUSY(x) (((x) & 0x1) << 8)
+#define G_008214_MEC1_DMA_BUSY(x) (((x) >> 8) & 0x1)
+#define C_008214_MEC1_DMA_BUSY 0xFFFFFEFF
+#define S_008214_MEC1_PARTIAL_FLUSH_BUSY(x) (((x) & 0x1) << 9)
+#define G_008214_MEC1_PARTIAL_FLUSH_BUSY(x) (((x) >> 9) & 0x1)
+#define C_008214_MEC1_PARTIAL_FLUSH_BUSY 0xFFFFFDFF
+#define S_008214_MEC1_PIPE0_BUSY(x) (((x) & 0x1) << 10)
+#define G_008214_MEC1_PIPE0_BUSY(x) (((x) >> 10) & 0x1)
+#define C_008214_MEC1_PIPE0_BUSY 0xFFFFFBFF
+#define S_008214_MEC1_PIPE1_BUSY(x) (((x) & 0x1) << 11)
+#define G_008214_MEC1_PIPE1_BUSY(x) (((x) >> 11) & 0x1)
+#define C_008214_MEC1_PIPE1_BUSY 0xFFFFF7FF
+#define S_008214_MEC1_PIPE2_BUSY(x) (((x) & 0x1) << 12)
+#define G_008214_MEC1_PIPE2_BUSY(x) (((x) >> 12) & 0x1)
+#define C_008214_MEC1_PIPE2_BUSY 0xFFFFEFFF
+#define S_008214_MEC1_PIPE3_BUSY(x) (((x) & 0x1) << 13)
+#define G_008214_MEC1_PIPE3_BUSY(x) (((x) >> 13) & 0x1)
+#define C_008214_MEC1_PIPE3_BUSY 0xFFFFDFFF
+#define S_008214_MEC2_LOAD_BUSY(x) (((x) & 0x1) << 16)
+#define G_008214_MEC2_LOAD_BUSY(x) (((x) >> 16) & 0x1)
+#define C_008214_MEC2_LOAD_BUSY 0xFFFEFFFF
+#define S_008214_MEC2_SEMAPOHRE_BUSY(x) (((x) & 0x1) << 17)
+#define G_008214_MEC2_SEMAPOHRE_BUSY(x) (((x) >> 17) & 0x1)
+#define C_008214_MEC2_SEMAPOHRE_BUSY 0xFFFDFFFF
+#define S_008214_MEC2_MUTEX_BUSY(x) (((x) & 0x1) << 18)
+#define G_008214_MEC2_MUTEX_BUSY(x) (((x) >> 18) & 0x1)
+#define C_008214_MEC2_MUTEX_BUSY 0xFFFBFFFF
+#define S_008214_MEC2_MESSAGE_BUSY(x) (((x) & 0x1) << 19)
+#define G_008214_MEC2_MESSAGE_BUSY(x) (((x) >> 19) & 0x1)
+#define C_008214_MEC2_MESSAGE_BUSY 0xFFF7FFFF
+#define S_008214_MEC2_EOP_QUEUE_BUSY(x) (((x) & 0x1) << 20)
+#define G_008214_MEC2_EOP_QUEUE_BUSY(x) (((x) >> 20) & 0x1)
+#define C_008214_MEC2_EOP_QUEUE_BUSY 0xFFEFFFFF
+#define S_008214_MEC2_IQ_QUEUE_BUSY(x) (((x) & 0x1) << 21)
+#define G_008214_MEC2_IQ_QUEUE_BUSY(x) (((x) >> 21) & 0x1)
+#define C_008214_MEC2_IQ_QUEUE_BUSY 0xFFDFFFFF
+#define S_008214_MEC2_IB_QUEUE_BUSY(x) (((x) & 0x1) << 22)
+#define G_008214_MEC2_IB_QUEUE_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008214_MEC2_IB_QUEUE_BUSY 0xFFBFFFFF
+#define S_008214_MEC2_TC_BUSY(x) (((x) & 0x1) << 23)
+#define G_008214_MEC2_TC_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008214_MEC2_TC_BUSY 0xFF7FFFFF
+#define S_008214_MEC2_DMA_BUSY(x) (((x) & 0x1) << 24)
+#define G_008214_MEC2_DMA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008214_MEC2_DMA_BUSY 0xFEFFFFFF
+#define S_008214_MEC2_PARTIAL_FLUSH_BUSY(x) (((x) & 0x1) << 25)
+#define G_008214_MEC2_PARTIAL_FLUSH_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008214_MEC2_PARTIAL_FLUSH_BUSY 0xFDFFFFFF
+#define S_008214_MEC2_PIPE0_BUSY(x) (((x) & 0x1) << 26)
+#define G_008214_MEC2_PIPE0_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008214_MEC2_PIPE0_BUSY 0xFBFFFFFF
+#define S_008214_MEC2_PIPE1_BUSY(x) (((x) & 0x1) << 27)
+#define G_008214_MEC2_PIPE1_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008214_MEC2_PIPE1_BUSY 0xF7FFFFFF
+#define S_008214_MEC2_PIPE2_BUSY(x) (((x) & 0x1) << 28)
+#define G_008214_MEC2_PIPE2_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008214_MEC2_PIPE2_BUSY 0xEFFFFFFF
+#define S_008214_MEC2_PIPE3_BUSY(x) (((x) & 0x1) << 29)
+#define G_008214_MEC2_PIPE3_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008214_MEC2_PIPE3_BUSY 0xDFFFFFFF
+#define R_008218_CP_CPC_STALLED_STAT1 0x008218
+#define S_008218_RCIU_TX_FREE_STALL(x) (((x) & 0x1) << 3)
+#define G_008218_RCIU_TX_FREE_STALL(x) (((x) >> 3) & 0x1)
+#define C_008218_RCIU_TX_FREE_STALL 0xFFFFFFF7
+#define S_008218_RCIU_PRIV_VIOLATION(x) (((x) & 0x1) << 4)
+#define G_008218_RCIU_PRIV_VIOLATION(x) (((x) >> 4) & 0x1)
+#define C_008218_RCIU_PRIV_VIOLATION 0xFFFFFFEF
+#define S_008218_TCIU_TX_FREE_STALL(x) (((x) & 0x1) << 6)
+#define G_008218_TCIU_TX_FREE_STALL(x) (((x) >> 6) & 0x1)
+#define C_008218_TCIU_TX_FREE_STALL 0xFFFFFFBF
+#define S_008218_MEC1_DECODING_PACKET(x) (((x) & 0x1) << 8)
+#define G_008218_MEC1_DECODING_PACKET(x) (((x) >> 8) & 0x1)
+#define C_008218_MEC1_DECODING_PACKET 0xFFFFFEFF
+#define S_008218_MEC1_WAIT_ON_RCIU(x) (((x) & 0x1) << 9)
+#define G_008218_MEC1_WAIT_ON_RCIU(x) (((x) >> 9) & 0x1)
+#define C_008218_MEC1_WAIT_ON_RCIU 0xFFFFFDFF
+#define S_008218_MEC1_WAIT_ON_RCIU_READ(x) (((x) & 0x1) << 10)
+#define G_008218_MEC1_WAIT_ON_RCIU_READ(x) (((x) >> 10) & 0x1)
+#define C_008218_MEC1_WAIT_ON_RCIU_READ 0xFFFFFBFF
+#define S_008218_MEC1_WAIT_ON_ROQ_DATA(x) (((x) & 0x1) << 13)
+#define G_008218_MEC1_WAIT_ON_ROQ_DATA(x) (((x) >> 13) & 0x1)
+#define C_008218_MEC1_WAIT_ON_ROQ_DATA 0xFFFFDFFF
+#define S_008218_MEC2_DECODING_PACKET(x) (((x) & 0x1) << 16)
+#define G_008218_MEC2_DECODING_PACKET(x) (((x) >> 16) & 0x1)
+#define C_008218_MEC2_DECODING_PACKET 0xFFFEFFFF
+#define S_008218_MEC2_WAIT_ON_RCIU(x) (((x) & 0x1) << 17)
+#define G_008218_MEC2_WAIT_ON_RCIU(x) (((x) >> 17) & 0x1)
+#define C_008218_MEC2_WAIT_ON_RCIU 0xFFFDFFFF
+#define S_008218_MEC2_WAIT_ON_RCIU_READ(x) (((x) & 0x1) << 18)
+#define G_008218_MEC2_WAIT_ON_RCIU_READ(x) (((x) >> 18) & 0x1)
+#define C_008218_MEC2_WAIT_ON_RCIU_READ 0xFFFBFFFF
+#define S_008218_MEC2_WAIT_ON_ROQ_DATA(x) (((x) & 0x1) << 21)
+#define G_008218_MEC2_WAIT_ON_ROQ_DATA(x) (((x) >> 21) & 0x1)
+#define C_008218_MEC2_WAIT_ON_ROQ_DATA 0xFFDFFFFF
+#define S_008218_ATCL2IU_WAITING_ON_FREE(x) (((x) & 0x1) << 22)
+#define G_008218_ATCL2IU_WAITING_ON_FREE(x) (((x) >> 22) & 0x1)
+#define C_008218_ATCL2IU_WAITING_ON_FREE 0xFFBFFFFF
+#define S_008218_ATCL2IU_WAITING_ON_TAGS(x) (((x) & 0x1) << 23)
+#define G_008218_ATCL2IU_WAITING_ON_TAGS(x) (((x) >> 23) & 0x1)
+#define C_008218_ATCL2IU_WAITING_ON_TAGS 0xFF7FFFFF
+#define S_008218_ATCL1_WAITING_ON_TRANS(x) (((x) & 0x1) << 24)
+#define G_008218_ATCL1_WAITING_ON_TRANS(x) (((x) >> 24) & 0x1)
+#define C_008218_ATCL1_WAITING_ON_TRANS 0xFEFFFFFF
+#define R_00821C_CP_CPF_STATUS 0x00821C
+#define S_00821C_POST_WPTR_GFX_BUSY(x) (((x) & 0x1) << 0)
+#define G_00821C_POST_WPTR_GFX_BUSY(x) (((x) >> 0) & 0x1)
+#define C_00821C_POST_WPTR_GFX_BUSY 0xFFFFFFFE
+#define S_00821C_CSF_BUSY(x) (((x) & 0x1) << 1)
+#define G_00821C_CSF_BUSY(x) (((x) >> 1) & 0x1)
+#define C_00821C_CSF_BUSY 0xFFFFFFFD
+#define S_00821C_ROQ_ALIGN_BUSY(x) (((x) & 0x1) << 4)
+#define G_00821C_ROQ_ALIGN_BUSY(x) (((x) >> 4) & 0x1)
+#define C_00821C_ROQ_ALIGN_BUSY 0xFFFFFFEF
+#define S_00821C_ROQ_RING_BUSY(x) (((x) & 0x1) << 5)
+#define G_00821C_ROQ_RING_BUSY(x) (((x) >> 5) & 0x1)
+#define C_00821C_ROQ_RING_BUSY 0xFFFFFFDF
+#define S_00821C_ROQ_INDIRECT1_BUSY(x) (((x) & 0x1) << 6)
+#define G_00821C_ROQ_INDIRECT1_BUSY(x) (((x) >> 6) & 0x1)
+#define C_00821C_ROQ_INDIRECT1_BUSY 0xFFFFFFBF
+#define S_00821C_ROQ_INDIRECT2_BUSY(x) (((x) & 0x1) << 7)
+#define G_00821C_ROQ_INDIRECT2_BUSY(x) (((x) >> 7) & 0x1)
+#define C_00821C_ROQ_INDIRECT2_BUSY 0xFFFFFF7F
+#define S_00821C_ROQ_STATE_BUSY(x) (((x) & 0x1) << 8)
+#define G_00821C_ROQ_STATE_BUSY(x) (((x) >> 8) & 0x1)
+#define C_00821C_ROQ_STATE_BUSY 0xFFFFFEFF
+#define S_00821C_ROQ_CE_RING_BUSY(x) (((x) & 0x1) << 9)
+#define G_00821C_ROQ_CE_RING_BUSY(x) (((x) >> 9) & 0x1)
+#define C_00821C_ROQ_CE_RING_BUSY 0xFFFFFDFF
+#define S_00821C_ROQ_CE_INDIRECT1_BUSY(x) (((x) & 0x1) << 10)
+#define G_00821C_ROQ_CE_INDIRECT1_BUSY(x) (((x) >> 10) & 0x1)
+#define C_00821C_ROQ_CE_INDIRECT1_BUSY 0xFFFFFBFF
+#define S_00821C_ROQ_CE_INDIRECT2_BUSY(x) (((x) & 0x1) << 11)
+#define G_00821C_ROQ_CE_INDIRECT2_BUSY(x) (((x) >> 11) & 0x1)
+#define C_00821C_ROQ_CE_INDIRECT2_BUSY 0xFFFFF7FF
+#define S_00821C_SEMAPHORE_BUSY(x) (((x) & 0x1) << 12)
+#define G_00821C_SEMAPHORE_BUSY(x) (((x) >> 12) & 0x1)
+#define C_00821C_SEMAPHORE_BUSY 0xFFFFEFFF
+#define S_00821C_INTERRUPT_BUSY(x) (((x) & 0x1) << 13)
+#define G_00821C_INTERRUPT_BUSY(x) (((x) >> 13) & 0x1)
+#define C_00821C_INTERRUPT_BUSY 0xFFFFDFFF
+#define S_00821C_TCIU_BUSY(x) (((x) & 0x1) << 14)
+#define G_00821C_TCIU_BUSY(x) (((x) >> 14) & 0x1)
+#define C_00821C_TCIU_BUSY 0xFFFFBFFF
+#define S_00821C_HQD_BUSY(x) (((x) & 0x1) << 15)
+#define G_00821C_HQD_BUSY(x) (((x) >> 15) & 0x1)
+#define C_00821C_HQD_BUSY 0xFFFF7FFF
+#define S_00821C_PRT_BUSY(x) (((x) & 0x1) << 16)
+#define G_00821C_PRT_BUSY(x) (((x) >> 16) & 0x1)
+#define C_00821C_PRT_BUSY 0xFFFEFFFF
+#define S_00821C_ATCL2IU_BUSY(x) (((x) & 0x1) << 17)
+#define G_00821C_ATCL2IU_BUSY(x) (((x) >> 17) & 0x1)
+#define C_00821C_ATCL2IU_BUSY 0xFFFDFFFF
+#define S_00821C_CPF_GFX_BUSY(x) (((x) & 0x1) << 26)
+#define G_00821C_CPF_GFX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_00821C_CPF_GFX_BUSY 0xFBFFFFFF
+#define S_00821C_CPF_CMP_BUSY(x) (((x) & 0x1) << 27)
+#define G_00821C_CPF_CMP_BUSY(x) (((x) >> 27) & 0x1)
+#define C_00821C_CPF_CMP_BUSY 0xF7FFFFFF
+#define S_00821C_GRBM_CPF_STAT_BUSY(x) (((x) & 0x03) << 28)
+#define G_00821C_GRBM_CPF_STAT_BUSY(x) (((x) >> 28) & 0x03)
+#define C_00821C_GRBM_CPF_STAT_BUSY 0xCFFFFFFF
+#define S_00821C_CPC_CPF_BUSY(x) (((x) & 0x1) << 30)
+#define G_00821C_CPC_CPF_BUSY(x) (((x) >> 30) & 0x1)
+#define C_00821C_CPC_CPF_BUSY 0xBFFFFFFF
+#define S_00821C_CPF_BUSY(x) (((x) & 0x1) << 31)
+#define G_00821C_CPF_BUSY(x) (((x) >> 31) & 0x1)
+#define C_00821C_CPF_BUSY 0x7FFFFFFF
+#define R_008220_CP_CPF_BUSY_STAT 0x008220
+#define S_008220_REG_BUS_FIFO_BUSY(x) (((x) & 0x1) << 0)
+#define G_008220_REG_BUS_FIFO_BUSY(x) (((x) >> 0) & 0x1)
+#define C_008220_REG_BUS_FIFO_BUSY 0xFFFFFFFE
+#define S_008220_CSF_RING_BUSY(x) (((x) & 0x1) << 1)
+#define G_008220_CSF_RING_BUSY(x) (((x) >> 1) & 0x1)
+#define C_008220_CSF_RING_BUSY 0xFFFFFFFD
+#define S_008220_CSF_INDIRECT1_BUSY(x) (((x) & 0x1) << 2)
+#define G_008220_CSF_INDIRECT1_BUSY(x) (((x) >> 2) & 0x1)
+#define C_008220_CSF_INDIRECT1_BUSY 0xFFFFFFFB
+#define S_008220_CSF_INDIRECT2_BUSY(x) (((x) & 0x1) << 3)
+#define G_008220_CSF_INDIRECT2_BUSY(x) (((x) >> 3) & 0x1)
+#define C_008220_CSF_INDIRECT2_BUSY 0xFFFFFFF7
+#define S_008220_CSF_STATE_BUSY(x) (((x) & 0x1) << 4)
+#define G_008220_CSF_STATE_BUSY(x) (((x) >> 4) & 0x1)
+#define C_008220_CSF_STATE_BUSY 0xFFFFFFEF
+#define S_008220_CSF_CE_INDR1_BUSY(x) (((x) & 0x1) << 5)
+#define G_008220_CSF_CE_INDR1_BUSY(x) (((x) >> 5) & 0x1)
+#define C_008220_CSF_CE_INDR1_BUSY 0xFFFFFFDF
+#define S_008220_CSF_CE_INDR2_BUSY(x) (((x) & 0x1) << 6)
+#define G_008220_CSF_CE_INDR2_BUSY(x) (((x) >> 6) & 0x1)
+#define C_008220_CSF_CE_INDR2_BUSY 0xFFFFFFBF
+#define S_008220_CSF_ARBITER_BUSY(x) (((x) & 0x1) << 7)
+#define G_008220_CSF_ARBITER_BUSY(x) (((x) >> 7) & 0x1)
+#define C_008220_CSF_ARBITER_BUSY 0xFFFFFF7F
+#define S_008220_CSF_INPUT_BUSY(x) (((x) & 0x1) << 8)
+#define G_008220_CSF_INPUT_BUSY(x) (((x) >> 8) & 0x1)
+#define C_008220_CSF_INPUT_BUSY 0xFFFFFEFF
+#define S_008220_OUTSTANDING_READ_TAGS(x) (((x) & 0x1) << 9)
+#define G_008220_OUTSTANDING_READ_TAGS(x) (((x) >> 9) & 0x1)
+#define C_008220_OUTSTANDING_READ_TAGS 0xFFFFFDFF
+#define S_008220_HPD_PROCESSING_EOP_BUSY(x) (((x) & 0x1) << 11)
+#define G_008220_HPD_PROCESSING_EOP_BUSY(x) (((x) >> 11) & 0x1)
+#define C_008220_HPD_PROCESSING_EOP_BUSY 0xFFFFF7FF
+#define S_008220_HQD_DISPATCH_BUSY(x) (((x) & 0x1) << 12)
+#define G_008220_HQD_DISPATCH_BUSY(x) (((x) >> 12) & 0x1)
+#define C_008220_HQD_DISPATCH_BUSY 0xFFFFEFFF
+#define S_008220_HQD_IQ_TIMER_BUSY(x) (((x) & 0x1) << 13)
+#define G_008220_HQD_IQ_TIMER_BUSY(x) (((x) >> 13) & 0x1)
+#define C_008220_HQD_IQ_TIMER_BUSY 0xFFFFDFFF
+#define S_008220_HQD_DMA_OFFLOAD_BUSY(x) (((x) & 0x1) << 14)
+#define G_008220_HQD_DMA_OFFLOAD_BUSY(x) (((x) >> 14) & 0x1)
+#define C_008220_HQD_DMA_OFFLOAD_BUSY 0xFFFFBFFF
+#define S_008220_HQD_WAIT_SEMAPHORE_BUSY(x) (((x) & 0x1) << 15)
+#define G_008220_HQD_WAIT_SEMAPHORE_BUSY(x) (((x) >> 15) & 0x1)
+#define C_008220_HQD_WAIT_SEMAPHORE_BUSY 0xFFFF7FFF
+#define S_008220_HQD_SIGNAL_SEMAPHORE_BUSY(x) (((x) & 0x1) << 16)
+#define G_008220_HQD_SIGNAL_SEMAPHORE_BUSY(x) (((x) >> 16) & 0x1)
+#define C_008220_HQD_SIGNAL_SEMAPHORE_BUSY 0xFFFEFFFF
+#define S_008220_HQD_MESSAGE_BUSY(x) (((x) & 0x1) << 17)
+#define G_008220_HQD_MESSAGE_BUSY(x) (((x) >> 17) & 0x1)
+#define C_008220_HQD_MESSAGE_BUSY 0xFFFDFFFF
+#define S_008220_HQD_PQ_FETCHER_BUSY(x) (((x) & 0x1) << 18)
+#define G_008220_HQD_PQ_FETCHER_BUSY(x) (((x) >> 18) & 0x1)
+#define C_008220_HQD_PQ_FETCHER_BUSY 0xFFFBFFFF
+#define S_008220_HQD_IB_FETCHER_BUSY(x) (((x) & 0x1) << 19)
+#define G_008220_HQD_IB_FETCHER_BUSY(x) (((x) >> 19) & 0x1)
+#define C_008220_HQD_IB_FETCHER_BUSY 0xFFF7FFFF
+#define S_008220_HQD_IQ_FETCHER_BUSY(x) (((x) & 0x1) << 20)
+#define G_008220_HQD_IQ_FETCHER_BUSY(x) (((x) >> 20) & 0x1)
+#define C_008220_HQD_IQ_FETCHER_BUSY 0xFFEFFFFF
+#define S_008220_HQD_EOP_FETCHER_BUSY(x) (((x) & 0x1) << 21)
+#define G_008220_HQD_EOP_FETCHER_BUSY(x) (((x) >> 21) & 0x1)
+#define C_008220_HQD_EOP_FETCHER_BUSY 0xFFDFFFFF
+#define S_008220_HQD_CONSUMED_RPTR_BUSY(x) (((x) & 0x1) << 22)
+#define G_008220_HQD_CONSUMED_RPTR_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008220_HQD_CONSUMED_RPTR_BUSY 0xFFBFFFFF
+#define S_008220_HQD_FETCHER_ARB_BUSY(x) (((x) & 0x1) << 23)
+#define G_008220_HQD_FETCHER_ARB_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008220_HQD_FETCHER_ARB_BUSY 0xFF7FFFFF
+#define S_008220_HQD_ROQ_ALIGN_BUSY(x) (((x) & 0x1) << 24)
+#define G_008220_HQD_ROQ_ALIGN_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008220_HQD_ROQ_ALIGN_BUSY 0xFEFFFFFF
+#define S_008220_HQD_ROQ_EOP_BUSY(x) (((x) & 0x1) << 25)
+#define G_008220_HQD_ROQ_EOP_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008220_HQD_ROQ_EOP_BUSY 0xFDFFFFFF
+#define S_008220_HQD_ROQ_IQ_BUSY(x) (((x) & 0x1) << 26)
+#define G_008220_HQD_ROQ_IQ_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008220_HQD_ROQ_IQ_BUSY 0xFBFFFFFF
+#define S_008220_HQD_ROQ_PQ_BUSY(x) (((x) & 0x1) << 27)
+#define G_008220_HQD_ROQ_PQ_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008220_HQD_ROQ_PQ_BUSY 0xF7FFFFFF
+#define S_008220_HQD_ROQ_IB_BUSY(x) (((x) & 0x1) << 28)
+#define G_008220_HQD_ROQ_IB_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008220_HQD_ROQ_IB_BUSY 0xEFFFFFFF
+#define S_008220_HQD_WPTR_POLL_BUSY(x) (((x) & 0x1) << 29)
+#define G_008220_HQD_WPTR_POLL_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008220_HQD_WPTR_POLL_BUSY 0xDFFFFFFF
+#define S_008220_HQD_PQ_BUSY(x) (((x) & 0x1) << 30)
+#define G_008220_HQD_PQ_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008220_HQD_PQ_BUSY 0xBFFFFFFF
+#define S_008220_HQD_IB_BUSY(x) (((x) & 0x1) << 31)
+#define G_008220_HQD_IB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008220_HQD_IB_BUSY 0x7FFFFFFF
+#define R_008224_CP_CPF_STALLED_STAT1 0x008224
+#define S_008224_RING_FETCHING_DATA(x) (((x) & 0x1) << 0)
+#define G_008224_RING_FETCHING_DATA(x) (((x) >> 0) & 0x1)
+#define C_008224_RING_FETCHING_DATA 0xFFFFFFFE
+#define S_008224_INDR1_FETCHING_DATA(x) (((x) & 0x1) << 1)
+#define G_008224_INDR1_FETCHING_DATA(x) (((x) >> 1) & 0x1)
+#define C_008224_INDR1_FETCHING_DATA 0xFFFFFFFD
+#define S_008224_INDR2_FETCHING_DATA(x) (((x) & 0x1) << 2)
+#define G_008224_INDR2_FETCHING_DATA(x) (((x) >> 2) & 0x1)
+#define C_008224_INDR2_FETCHING_DATA 0xFFFFFFFB
+#define S_008224_STATE_FETCHING_DATA(x) (((x) & 0x1) << 3)
+#define G_008224_STATE_FETCHING_DATA(x) (((x) >> 3) & 0x1)
+#define C_008224_STATE_FETCHING_DATA 0xFFFFFFF7
+#define S_008224_TCIU_WAITING_ON_FREE(x) (((x) & 0x1) << 5)
+#define G_008224_TCIU_WAITING_ON_FREE(x) (((x) >> 5) & 0x1)
+#define C_008224_TCIU_WAITING_ON_FREE 0xFFFFFFDF
+#define S_008224_TCIU_WAITING_ON_TAGS(x) (((x) & 0x1) << 6)
+#define G_008224_TCIU_WAITING_ON_TAGS(x) (((x) >> 6) & 0x1)
+#define C_008224_TCIU_WAITING_ON_TAGS 0xFFFFFFBF
+#define S_008224_ATCL2IU_WAITING_ON_FREE(x) (((x) & 0x1) << 7)
+#define G_008224_ATCL2IU_WAITING_ON_FREE(x) (((x) >> 7) & 0x1)
+#define C_008224_ATCL2IU_WAITING_ON_FREE 0xFFFFFF7F
+#define S_008224_ATCL2IU_WAITING_ON_TAGS(x) (((x) & 0x1) << 8)
+#define G_008224_ATCL2IU_WAITING_ON_TAGS(x) (((x) >> 8) & 0x1)
+#define C_008224_ATCL2IU_WAITING_ON_TAGS 0xFFFFFEFF
+#define S_008224_ATCL1_WAITING_ON_TRANS(x) (((x) & 0x1) << 9)
+#define G_008224_ATCL1_WAITING_ON_TRANS(x) (((x) >> 9) & 0x1)
+#define C_008224_ATCL1_WAITING_ON_TRANS 0xFFFFFDFF
#define R_030230_CP_COHER_SIZE_HI 0x030230
#define S_030230_COHER_SIZE_HI_256B(x) (((x) & 0xFF) << 0)
#define G_030230_COHER_SIZE_HI_256B(x) (((x) >> 0) & 0xFF)
@@ -375,10 +1299,6 @@
#define C_0088C4_ES_LIMIT 0xFFE0FFFF
#define R_0088C8_VGT_ESGS_RING_SIZE 0x0088C8
#define R_0088CC_VGT_GSVS_RING_SIZE 0x0088CC
-/* CIK */
-#define R_030900_VGT_ESGS_RING_SIZE 0x030900
-#define R_030904_VGT_GSVS_RING_SIZE 0x030904
-/* */
#define R_0088D4_VGT_GS_VERTEX_REUSE 0x0088D4
#define S_0088D4_VERT_REUSE(x) (((x) & 0x1F) << 0)
#define G_0088D4_VERT_REUSE(x) (((x) >> 0) & 0x1F)
@@ -461,7 +1381,293 @@
#define S_008B10_CURRENT_COUNT(x) (((x) & 0xFF) << 8)
#define G_008B10_CURRENT_COUNT(x) (((x) >> 8) & 0xFF)
#define C_008B10_CURRENT_COUNT 0xFFFF00FF
+#define R_008670_CP_STALLED_STAT3 0x008670
+#define S_008670_CE_TO_CSF_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 0)
+#define G_008670_CE_TO_CSF_NOT_RDY_TO_RCV(x) (((x) >> 0) & 0x1)
+#define C_008670_CE_TO_CSF_NOT_RDY_TO_RCV 0xFFFFFFFE
+#define S_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 1)
+#define G_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV(x) (((x) >> 1) & 0x1)
+#define C_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV 0xFFFFFFFD
+#define S_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER(x) (((x) & 0x1) << 2)
+#define G_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER(x) (((x) >> 2) & 0x1)
+#define C_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER 0xFFFFFFFB
+#define S_008670_CE_TO_RAM_INIT_NOT_RDY(x) (((x) & 0x1) << 3)
+#define G_008670_CE_TO_RAM_INIT_NOT_RDY(x) (((x) >> 3) & 0x1)
+#define C_008670_CE_TO_RAM_INIT_NOT_RDY 0xFFFFFFF7
+#define S_008670_CE_TO_RAM_DUMP_NOT_RDY(x) (((x) & 0x1) << 4)
+#define G_008670_CE_TO_RAM_DUMP_NOT_RDY(x) (((x) >> 4) & 0x1)
+#define C_008670_CE_TO_RAM_DUMP_NOT_RDY 0xFFFFFFEF
+#define S_008670_CE_TO_RAM_WRITE_NOT_RDY(x) (((x) & 0x1) << 5)
+#define G_008670_CE_TO_RAM_WRITE_NOT_RDY(x) (((x) >> 5) & 0x1)
+#define C_008670_CE_TO_RAM_WRITE_NOT_RDY 0xFFFFFFDF
+#define S_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 6)
+#define G_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV(x) (((x) >> 6) & 0x1)
+#define C_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV 0xFFFFFFBF
+#define S_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 7)
+#define G_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV(x) (((x) >> 7) & 0x1)
+#define C_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV 0xFFFFFF7F
+#define S_008670_CE_WAITING_ON_BUFFER_DATA(x) (((x) & 0x1) << 10)
+#define G_008670_CE_WAITING_ON_BUFFER_DATA(x) (((x) >> 10) & 0x1)
+#define C_008670_CE_WAITING_ON_BUFFER_DATA 0xFFFFFBFF
+#define S_008670_CE_WAITING_ON_CE_BUFFER_FLAG(x) (((x) & 0x1) << 11)
+#define G_008670_CE_WAITING_ON_CE_BUFFER_FLAG(x) (((x) >> 11) & 0x1)
+#define C_008670_CE_WAITING_ON_CE_BUFFER_FLAG 0xFFFFF7FF
+#define S_008670_CE_WAITING_ON_DE_COUNTER(x) (((x) & 0x1) << 12)
+#define G_008670_CE_WAITING_ON_DE_COUNTER(x) (((x) >> 12) & 0x1)
+#define C_008670_CE_WAITING_ON_DE_COUNTER 0xFFFFEFFF
+#define S_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW(x) (((x) & 0x1) << 13)
+#define G_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW(x) (((x) >> 13) & 0x1)
+#define C_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW 0xFFFFDFFF
+#define S_008670_TCIU_WAITING_ON_FREE(x) (((x) & 0x1) << 14)
+#define G_008670_TCIU_WAITING_ON_FREE(x) (((x) >> 14) & 0x1)
+#define C_008670_TCIU_WAITING_ON_FREE 0xFFFFBFFF
+#define S_008670_TCIU_WAITING_ON_TAGS(x) (((x) & 0x1) << 15)
+#define G_008670_TCIU_WAITING_ON_TAGS(x) (((x) >> 15) & 0x1)
+#define C_008670_TCIU_WAITING_ON_TAGS 0xFFFF7FFF
+#define S_008670_CE_STALLED_ON_TC_WR_CONFIRM(x) (((x) & 0x1) << 16)
+#define G_008670_CE_STALLED_ON_TC_WR_CONFIRM(x) (((x) >> 16) & 0x1)
+#define C_008670_CE_STALLED_ON_TC_WR_CONFIRM 0xFFFEFFFF
+#define S_008670_CE_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) & 0x1) << 17)
+#define G_008670_CE_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) >> 17) & 0x1)
+#define C_008670_CE_STALLED_ON_ATOMIC_RTN_DATA 0xFFFDFFFF
+#define S_008670_ATCL2IU_WAITING_ON_FREE(x) (((x) & 0x1) << 18)
+#define G_008670_ATCL2IU_WAITING_ON_FREE(x) (((x) >> 18) & 0x1)
+#define C_008670_ATCL2IU_WAITING_ON_FREE 0xFFFBFFFF
+#define S_008670_ATCL2IU_WAITING_ON_TAGS(x) (((x) & 0x1) << 19)
+#define G_008670_ATCL2IU_WAITING_ON_TAGS(x) (((x) >> 19) & 0x1)
+#define C_008670_ATCL2IU_WAITING_ON_TAGS 0xFFF7FFFF
+#define S_008670_ATCL1_WAITING_ON_TRANS(x) (((x) & 0x1) << 20)
+#define G_008670_ATCL1_WAITING_ON_TRANS(x) (((x) >> 20) & 0x1)
+#define C_008670_ATCL1_WAITING_ON_TRANS 0xFFEFFFFF
+#define R_008674_CP_STALLED_STAT1 0x008674
+#define S_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 0)
+#define G_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV(x) (((x) >> 0) & 0x1)
+#define C_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV 0xFFFFFFFE
+#define S_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 2)
+#define G_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV(x) (((x) >> 2) & 0x1)
+#define C_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV 0xFFFFFFFB
+#define S_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 4)
+#define G_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV(x) (((x) >> 4) & 0x1)
+#define C_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV 0xFFFFFFEF
+#define S_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG(x) (((x) & 0x1) << 10)
+#define G_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG(x) (((x) >> 10) & 0x1)
+#define C_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG 0xFFFFFBFF
+#define S_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG(x) (((x) & 0x1) << 11)
+#define G_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG(x) (((x) >> 11) & 0x1)
+#define C_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG 0xFFFFF7FF
+#define S_008674_ME_STALLED_ON_TC_WR_CONFIRM(x) (((x) & 0x1) << 12)
+#define G_008674_ME_STALLED_ON_TC_WR_CONFIRM(x) (((x) >> 12) & 0x1)
+#define C_008674_ME_STALLED_ON_TC_WR_CONFIRM 0xFFFFEFFF
+#define S_008674_ME_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) & 0x1) << 13)
+#define G_008674_ME_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) >> 13) & 0x1)
+#define C_008674_ME_STALLED_ON_ATOMIC_RTN_DATA 0xFFFFDFFF
+#define S_008674_ME_WAITING_ON_TC_READ_DATA(x) (((x) & 0x1) << 14)
+#define G_008674_ME_WAITING_ON_TC_READ_DATA(x) (((x) >> 14) & 0x1)
+#define C_008674_ME_WAITING_ON_TC_READ_DATA 0xFFFFBFFF
+#define S_008674_ME_WAITING_ON_REG_READ_DATA(x) (((x) & 0x1) << 15)
+#define G_008674_ME_WAITING_ON_REG_READ_DATA(x) (((x) >> 15) & 0x1)
+#define C_008674_ME_WAITING_ON_REG_READ_DATA 0xFFFF7FFF
+#define S_008674_RCIU_WAITING_ON_GDS_FREE(x) (((x) & 0x1) << 23)
+#define G_008674_RCIU_WAITING_ON_GDS_FREE(x) (((x) >> 23) & 0x1)
+#define C_008674_RCIU_WAITING_ON_GDS_FREE 0xFF7FFFFF
+#define S_008674_RCIU_WAITING_ON_GRBM_FREE(x) (((x) & 0x1) << 24)
+#define G_008674_RCIU_WAITING_ON_GRBM_FREE(x) (((x) >> 24) & 0x1)
+#define C_008674_RCIU_WAITING_ON_GRBM_FREE 0xFEFFFFFF
+#define S_008674_RCIU_WAITING_ON_VGT_FREE(x) (((x) & 0x1) << 25)
+#define G_008674_RCIU_WAITING_ON_VGT_FREE(x) (((x) >> 25) & 0x1)
+#define C_008674_RCIU_WAITING_ON_VGT_FREE 0xFDFFFFFF
+#define S_008674_RCIU_STALLED_ON_ME_READ(x) (((x) & 0x1) << 26)
+#define G_008674_RCIU_STALLED_ON_ME_READ(x) (((x) >> 26) & 0x1)
+#define C_008674_RCIU_STALLED_ON_ME_READ 0xFBFFFFFF
+#define S_008674_RCIU_STALLED_ON_DMA_READ(x) (((x) & 0x1) << 27)
+#define G_008674_RCIU_STALLED_ON_DMA_READ(x) (((x) >> 27) & 0x1)
+#define C_008674_RCIU_STALLED_ON_DMA_READ 0xF7FFFFFF
+#define S_008674_RCIU_STALLED_ON_APPEND_READ(x) (((x) & 0x1) << 28)
+#define G_008674_RCIU_STALLED_ON_APPEND_READ(x) (((x) >> 28) & 0x1)
+#define C_008674_RCIU_STALLED_ON_APPEND_READ 0xEFFFFFFF
+#define S_008674_RCIU_HALTED_BY_REG_VIOLATION(x) (((x) & 0x1) << 29)
+#define G_008674_RCIU_HALTED_BY_REG_VIOLATION(x) (((x) >> 29) & 0x1)
+#define C_008674_RCIU_HALTED_BY_REG_VIOLATION 0xDFFFFFFF
+#define R_008678_CP_STALLED_STAT2 0x008678
+#define S_008678_PFP_TO_CSF_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 0)
+#define G_008678_PFP_TO_CSF_NOT_RDY_TO_RCV(x) (((x) >> 0) & 0x1)
+#define C_008678_PFP_TO_CSF_NOT_RDY_TO_RCV 0xFFFFFFFE
+#define S_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 1)
+#define G_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV(x) (((x) >> 1) & 0x1)
+#define C_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV 0xFFFFFFFD
+#define S_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 2)
+#define G_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV(x) (((x) >> 2) & 0x1)
+#define C_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV 0xFFFFFFFB
+#define S_008678_PFP_TO_VGT_WRITES_PENDING(x) (((x) & 0x1) << 4)
+#define G_008678_PFP_TO_VGT_WRITES_PENDING(x) (((x) >> 4) & 0x1)
+#define C_008678_PFP_TO_VGT_WRITES_PENDING 0xFFFFFFEF
+#define S_008678_PFP_RCIU_READ_PENDING(x) (((x) & 0x1) << 5)
+#define G_008678_PFP_RCIU_READ_PENDING(x) (((x) >> 5) & 0x1)
+#define C_008678_PFP_RCIU_READ_PENDING 0xFFFFFFDF
+#define S_008678_PFP_WAITING_ON_BUFFER_DATA(x) (((x) & 0x1) << 8)
+#define G_008678_PFP_WAITING_ON_BUFFER_DATA(x) (((x) >> 8) & 0x1)
+#define C_008678_PFP_WAITING_ON_BUFFER_DATA 0xFFFFFEFF
+#define S_008678_ME_WAIT_ON_CE_COUNTER(x) (((x) & 0x1) << 9)
+#define G_008678_ME_WAIT_ON_CE_COUNTER(x) (((x) >> 9) & 0x1)
+#define C_008678_ME_WAIT_ON_CE_COUNTER 0xFFFFFDFF
+#define S_008678_ME_WAIT_ON_AVAIL_BUFFER(x) (((x) & 0x1) << 10)
+#define G_008678_ME_WAIT_ON_AVAIL_BUFFER(x) (((x) >> 10) & 0x1)
+#define C_008678_ME_WAIT_ON_AVAIL_BUFFER 0xFFFFFBFF
+#define S_008678_GFX_CNTX_NOT_AVAIL_TO_ME(x) (((x) & 0x1) << 11)
+#define G_008678_GFX_CNTX_NOT_AVAIL_TO_ME(x) (((x) >> 11) & 0x1)
+#define C_008678_GFX_CNTX_NOT_AVAIL_TO_ME 0xFFFFF7FF
+#define S_008678_ME_RCIU_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 12)
+#define G_008678_ME_RCIU_NOT_RDY_TO_RCV(x) (((x) >> 12) & 0x1)
+#define C_008678_ME_RCIU_NOT_RDY_TO_RCV 0xFFFFEFFF
+#define S_008678_ME_TO_CONST_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 13)
+#define G_008678_ME_TO_CONST_NOT_RDY_TO_RCV(x) (((x) >> 13) & 0x1)
+#define C_008678_ME_TO_CONST_NOT_RDY_TO_RCV 0xFFFFDFFF
+#define S_008678_ME_WAITING_DATA_FROM_PFP(x) (((x) & 0x1) << 14)
+#define G_008678_ME_WAITING_DATA_FROM_PFP(x) (((x) >> 14) & 0x1)
+#define C_008678_ME_WAITING_DATA_FROM_PFP 0xFFFFBFFF
+#define S_008678_ME_WAITING_ON_PARTIAL_FLUSH(x) (((x) & 0x1) << 15)
+#define G_008678_ME_WAITING_ON_PARTIAL_FLUSH(x) (((x) >> 15) & 0x1)
+#define C_008678_ME_WAITING_ON_PARTIAL_FLUSH 0xFFFF7FFF
+#define S_008678_MEQ_TO_ME_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 16)
+#define G_008678_MEQ_TO_ME_NOT_RDY_TO_RCV(x) (((x) >> 16) & 0x1)
+#define C_008678_MEQ_TO_ME_NOT_RDY_TO_RCV 0xFFFEFFFF
+#define S_008678_STQ_TO_ME_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 17)
+#define G_008678_STQ_TO_ME_NOT_RDY_TO_RCV(x) (((x) >> 17) & 0x1)
+#define C_008678_STQ_TO_ME_NOT_RDY_TO_RCV 0xFFFDFFFF
+#define S_008678_ME_WAITING_DATA_FROM_STQ(x) (((x) & 0x1) << 18)
+#define G_008678_ME_WAITING_DATA_FROM_STQ(x) (((x) >> 18) & 0x1)
+#define C_008678_ME_WAITING_DATA_FROM_STQ 0xFFFBFFFF
+#define S_008678_PFP_STALLED_ON_TC_WR_CONFIRM(x) (((x) & 0x1) << 19)
+#define G_008678_PFP_STALLED_ON_TC_WR_CONFIRM(x) (((x) >> 19) & 0x1)
+#define C_008678_PFP_STALLED_ON_TC_WR_CONFIRM 0xFFF7FFFF
+#define S_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) & 0x1) << 20)
+#define G_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) >> 20) & 0x1)
+#define C_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA 0xFFEFFFFF
+#define S_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE(x) (((x) & 0x1) << 21)
+#define G_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE(x) (((x) >> 21) & 0x1)
+#define C_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE 0xFFDFFFFF
+#define S_008678_EOPD_FIFO_NEEDS_WR_CONFIRM(x) (((x) & 0x1) << 22)
+#define G_008678_EOPD_FIFO_NEEDS_WR_CONFIRM(x) (((x) >> 22) & 0x1)
+#define C_008678_EOPD_FIFO_NEEDS_WR_CONFIRM 0xFFBFFFFF
+#define S_008678_STRMO_WR_OF_PRIM_DATA_PENDING(x) (((x) & 0x1) << 23)
+#define G_008678_STRMO_WR_OF_PRIM_DATA_PENDING(x) (((x) >> 23) & 0x1)
+#define C_008678_STRMO_WR_OF_PRIM_DATA_PENDING 0xFF7FFFFF
+#define S_008678_PIPE_STATS_WR_DATA_PENDING(x) (((x) & 0x1) << 24)
+#define G_008678_PIPE_STATS_WR_DATA_PENDING(x) (((x) >> 24) & 0x1)
+#define C_008678_PIPE_STATS_WR_DATA_PENDING 0xFEFFFFFF
+#define S_008678_APPEND_RDY_WAIT_ON_CS_DONE(x) (((x) & 0x1) << 25)
+#define G_008678_APPEND_RDY_WAIT_ON_CS_DONE(x) (((x) >> 25) & 0x1)
+#define C_008678_APPEND_RDY_WAIT_ON_CS_DONE 0xFDFFFFFF
+#define S_008678_APPEND_RDY_WAIT_ON_PS_DONE(x) (((x) & 0x1) << 26)
+#define G_008678_APPEND_RDY_WAIT_ON_PS_DONE(x) (((x) >> 26) & 0x1)
+#define C_008678_APPEND_RDY_WAIT_ON_PS_DONE 0xFBFFFFFF
+#define S_008678_APPEND_WAIT_ON_WR_CONFIRM(x) (((x) & 0x1) << 27)
+#define G_008678_APPEND_WAIT_ON_WR_CONFIRM(x) (((x) >> 27) & 0x1)
+#define C_008678_APPEND_WAIT_ON_WR_CONFIRM 0xF7FFFFFF
+#define S_008678_APPEND_ACTIVE_PARTITION(x) (((x) & 0x1) << 28)
+#define G_008678_APPEND_ACTIVE_PARTITION(x) (((x) >> 28) & 0x1)
+#define C_008678_APPEND_ACTIVE_PARTITION 0xEFFFFFFF
+#define S_008678_APPEND_WAITING_TO_SEND_MEMWRITE(x) (((x) & 0x1) << 29)
+#define G_008678_APPEND_WAITING_TO_SEND_MEMWRITE(x) (((x) >> 29) & 0x1)
+#define C_008678_APPEND_WAITING_TO_SEND_MEMWRITE 0xDFFFFFFF
+#define S_008678_SURF_SYNC_NEEDS_IDLE_CNTXS(x) (((x) & 0x1) << 30)
+#define G_008678_SURF_SYNC_NEEDS_IDLE_CNTXS(x) (((x) >> 30) & 0x1)
+#define C_008678_SURF_SYNC_NEEDS_IDLE_CNTXS 0xBFFFFFFF
+#define S_008678_SURF_SYNC_NEEDS_ALL_CLEAN(x) (((x) & 0x1) << 31)
+#define G_008678_SURF_SYNC_NEEDS_ALL_CLEAN(x) (((x) >> 31) & 0x1)
+#define C_008678_SURF_SYNC_NEEDS_ALL_CLEAN 0x7FFFFFFF
+#define R_008680_CP_STAT 0x008680
+#define S_008680_ROQ_RING_BUSY(x) (((x) & 0x1) << 9)
+#define G_008680_ROQ_RING_BUSY(x) (((x) >> 9) & 0x1)
+#define C_008680_ROQ_RING_BUSY 0xFFFFFDFF
+#define S_008680_ROQ_INDIRECT1_BUSY(x) (((x) & 0x1) << 10)
+#define G_008680_ROQ_INDIRECT1_BUSY(x) (((x) >> 10) & 0x1)
+#define C_008680_ROQ_INDIRECT1_BUSY 0xFFFFFBFF
+#define S_008680_ROQ_INDIRECT2_BUSY(x) (((x) & 0x1) << 11)
+#define G_008680_ROQ_INDIRECT2_BUSY(x) (((x) >> 11) & 0x1)
+#define C_008680_ROQ_INDIRECT2_BUSY 0xFFFFF7FF
+#define S_008680_ROQ_STATE_BUSY(x) (((x) & 0x1) << 12)
+#define G_008680_ROQ_STATE_BUSY(x) (((x) >> 12) & 0x1)
+#define C_008680_ROQ_STATE_BUSY 0xFFFFEFFF
+#define S_008680_DC_BUSY(x) (((x) & 0x1) << 13)
+#define G_008680_DC_BUSY(x) (((x) >> 13) & 0x1)
+#define C_008680_DC_BUSY 0xFFFFDFFF
+#define S_008680_ATCL2IU_BUSY(x) (((x) & 0x1) << 14)
+#define G_008680_ATCL2IU_BUSY(x) (((x) >> 14) & 0x1)
+#define C_008680_ATCL2IU_BUSY 0xFFFFBFFF
+#define S_008680_PFP_BUSY(x) (((x) & 0x1) << 15)
+#define G_008680_PFP_BUSY(x) (((x) >> 15) & 0x1)
+#define C_008680_PFP_BUSY 0xFFFF7FFF
+#define S_008680_MEQ_BUSY(x) (((x) & 0x1) << 16)
+#define G_008680_MEQ_BUSY(x) (((x) >> 16) & 0x1)
+#define C_008680_MEQ_BUSY 0xFFFEFFFF
+#define S_008680_ME_BUSY(x) (((x) & 0x1) << 17)
+#define G_008680_ME_BUSY(x) (((x) >> 17) & 0x1)
+#define C_008680_ME_BUSY 0xFFFDFFFF
+#define S_008680_QUERY_BUSY(x) (((x) & 0x1) << 18)
+#define G_008680_QUERY_BUSY(x) (((x) >> 18) & 0x1)
+#define C_008680_QUERY_BUSY 0xFFFBFFFF
+#define S_008680_SEMAPHORE_BUSY(x) (((x) & 0x1) << 19)
+#define G_008680_SEMAPHORE_BUSY(x) (((x) >> 19) & 0x1)
+#define C_008680_SEMAPHORE_BUSY 0xFFF7FFFF
+#define S_008680_INTERRUPT_BUSY(x) (((x) & 0x1) << 20)
+#define G_008680_INTERRUPT_BUSY(x) (((x) >> 20) & 0x1)
+#define C_008680_INTERRUPT_BUSY 0xFFEFFFFF
+#define S_008680_SURFACE_SYNC_BUSY(x) (((x) & 0x1) << 21)
+#define G_008680_SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
+#define C_008680_SURFACE_SYNC_BUSY 0xFFDFFFFF
+#define S_008680_DMA_BUSY(x) (((x) & 0x1) << 22)
+#define G_008680_DMA_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008680_DMA_BUSY 0xFFBFFFFF
+#define S_008680_RCIU_BUSY(x) (((x) & 0x1) << 23)
+#define G_008680_RCIU_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008680_RCIU_BUSY 0xFF7FFFFF
+#define S_008680_SCRATCH_RAM_BUSY(x) (((x) & 0x1) << 24)
+#define G_008680_SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008680_SCRATCH_RAM_BUSY 0xFEFFFFFF
+#define S_008680_CPC_CPG_BUSY(x) (((x) & 0x1) << 25)
+#define G_008680_CPC_CPG_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008680_CPC_CPG_BUSY 0xFDFFFFFF
+#define S_008680_CE_BUSY(x) (((x) & 0x1) << 26)
+#define G_008680_CE_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008680_CE_BUSY 0xFBFFFFFF
+#define S_008680_TCIU_BUSY(x) (((x) & 0x1) << 27)
+#define G_008680_TCIU_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008680_TCIU_BUSY 0xF7FFFFFF
+#define S_008680_ROQ_CE_RING_BUSY(x) (((x) & 0x1) << 28)
+#define G_008680_ROQ_CE_RING_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008680_ROQ_CE_RING_BUSY 0xEFFFFFFF
+#define S_008680_ROQ_CE_INDIRECT1_BUSY(x) (((x) & 0x1) << 29)
+#define G_008680_ROQ_CE_INDIRECT1_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008680_ROQ_CE_INDIRECT1_BUSY 0xDFFFFFFF
+#define S_008680_ROQ_CE_INDIRECT2_BUSY(x) (((x) & 0x1) << 30)
+#define G_008680_ROQ_CE_INDIRECT2_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008680_ROQ_CE_INDIRECT2_BUSY 0xBFFFFFFF
+#define S_008680_CP_BUSY(x) (((x) & 0x1) << 31)
+#define G_008680_CP_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008680_CP_BUSY 0x7FFFFFFF
/* CIK */
+#define R_030800_GRBM_GFX_INDEX 0x030800
+#define S_030800_INSTANCE_INDEX(x) (((x) & 0xFF) << 0)
+#define G_030800_INSTANCE_INDEX(x) (((x) >> 0) & 0xFF)
+#define C_030800_INSTANCE_INDEX 0xFFFFFF00
+#define S_030800_SH_INDEX(x) (((x) & 0xFF) << 8)
+#define G_030800_SH_INDEX(x) (((x) >> 8) & 0xFF)
+#define C_030800_SH_INDEX 0xFFFF00FF
+#define S_030800_SE_INDEX(x) (((x) & 0xFF) << 16)
+#define G_030800_SE_INDEX(x) (((x) >> 16) & 0xFF)
+#define C_030800_SE_INDEX 0xFF00FFFF
+#define S_030800_SH_BROADCAST_WRITES(x) (((x) & 0x1) << 29)
+#define G_030800_SH_BROADCAST_WRITES(x) (((x) >> 29) & 0x1)
+#define C_030800_SH_BROADCAST_WRITES 0xDFFFFFFF
+#define S_030800_INSTANCE_BROADCAST_WRITES(x) (((x) & 0x1) << 30)
+#define G_030800_INSTANCE_BROADCAST_WRITES(x) (((x) >> 30) & 0x1)
+#define C_030800_INSTANCE_BROADCAST_WRITES 0xBFFFFFFF
+#define S_030800_SE_BROADCAST_WRITES(x) (((x) & 0x1) << 31)
+#define G_030800_SE_BROADCAST_WRITES(x) (((x) >> 31) & 0x1)
+#define C_030800_SE_BROADCAST_WRITES 0x7FFFFFFF
+#define R_030900_VGT_ESGS_RING_SIZE 0x030900
+#define R_030904_VGT_GSVS_RING_SIZE 0x030904
#define R_030908_VGT_PRIMITIVE_TYPE 0x030908
#define S_030908_PRIM_TYPE(x) (((x) & 0x3F) << 0)
#define G_030908_PRIM_TYPE(x) (((x) >> 0) & 0x3F)
@@ -530,6 +1736,34 @@
#define S_030A04_CURRENT_COUNT(x) (((x) & 0xFF) << 8)
#define G_030A04_CURRENT_COUNT(x) (((x) >> 8) & 0xFF)
#define C_030A04_CURRENT_COUNT 0xFFFF00FF
+#define R_030A10_PA_SC_SCREEN_EXTENT_MIN_0 0x030A10
+#define S_030A10_X(x) (((x) & 0xFFFF) << 0)
+#define G_030A10_X(x) (((x) >> 0) & 0xFFFF)
+#define C_030A10_X 0xFFFF0000
+#define S_030A10_Y(x) (((x) & 0xFFFF) << 16)
+#define G_030A10_Y(x) (((x) >> 16) & 0xFFFF)
+#define C_030A10_Y 0x0000FFFF
+#define R_030A14_PA_SC_SCREEN_EXTENT_MAX_0 0x030A14
+#define S_030A14_X(x) (((x) & 0xFFFF) << 0)
+#define G_030A14_X(x) (((x) >> 0) & 0xFFFF)
+#define C_030A14_X 0xFFFF0000
+#define S_030A14_Y(x) (((x) & 0xFFFF) << 16)
+#define G_030A14_Y(x) (((x) >> 16) & 0xFFFF)
+#define C_030A14_Y 0x0000FFFF
+#define R_030A18_PA_SC_SCREEN_EXTENT_MIN_1 0x030A18
+#define S_030A18_X(x) (((x) & 0xFFFF) << 0)
+#define G_030A18_X(x) (((x) >> 0) & 0xFFFF)
+#define C_030A18_X 0xFFFF0000
+#define S_030A18_Y(x) (((x) & 0xFFFF) << 16)
+#define G_030A18_Y(x) (((x) >> 16) & 0xFFFF)
+#define C_030A18_Y 0x0000FFFF
+#define R_030A2C_PA_SC_SCREEN_EXTENT_MAX_1 0x030A2C
+#define S_030A2C_X(x) (((x) & 0xFFFF) << 0)
+#define G_030A2C_X(x) (((x) >> 0) & 0xFFFF)
+#define C_030A2C_X 0xFFFF0000
+#define S_030A2C_Y(x) (((x) & 0xFFFF) << 16)
+#define G_030A2C_Y(x) (((x) >> 16) & 0xFFFF)
+#define C_030A2C_Y 0x0000FFFF
/* */
#define R_008BF0_PA_SC_ENHANCE 0x008BF0
#define S_008BF0_ENABLE_PA_SC_OUT_OF_ORDER(x) (((x) & 0x1) << 0)
@@ -608,6 +1842,32 @@
#define V_008DFC_SQ_VGPR 0x00
/* */
#define R_008DFC_SQ_INST 0x008DFC
+#define R_030D20_SQC_CACHES 0x030D20
+#define S_030D20_TARGET_INST(x) (((x) & 0x1) << 0)
+#define G_030D20_TARGET_INST(x) (((x) >> 0) & 0x1)
+#define C_030D20_TARGET_INST 0xFFFFFFFE
+#define S_030D20_TARGET_DATA(x) (((x) & 0x1) << 1)
+#define G_030D20_TARGET_DATA(x) (((x) >> 1) & 0x1)
+#define C_030D20_TARGET_DATA 0xFFFFFFFD
+#define S_030D20_INVALIDATE(x) (((x) & 0x1) << 2)
+#define G_030D20_INVALIDATE(x) (((x) >> 2) & 0x1)
+#define C_030D20_INVALIDATE 0xFFFFFFFB
+#define S_030D20_WRITEBACK(x) (((x) & 0x1) << 3)
+#define G_030D20_WRITEBACK(x) (((x) >> 3) & 0x1)
+#define C_030D20_WRITEBACK 0xFFFFFFF7
+#define S_030D20_VOL(x) (((x) & 0x1) << 4)
+#define G_030D20_VOL(x) (((x) >> 4) & 0x1)
+#define C_030D20_VOL 0xFFFFFFEF
+#define S_030D20_COMPLETE(x) (((x) & 0x1) << 16)
+#define G_030D20_COMPLETE(x) (((x) >> 16) & 0x1)
+#define C_030D20_COMPLETE 0xFFFEFFFF
+#define R_030D24_SQC_WRITEBACK 0x030D24
+#define S_030D24_DWB(x) (((x) & 0x1) << 0)
+#define G_030D24_DWB(x) (((x) >> 0) & 0x1)
+#define C_030D24_DWB 0xFFFFFFFE
+#define S_030D24_DIRTY(x) (((x) & 0x1) << 1)
+#define G_030D24_DIRTY(x) (((x) >> 1) & 0x1)
+#define C_030D24_DIRTY 0xFFFFFFFD
#define R_008DFC_SQ_VOP1 0x008DFC
#define S_008DFC_SRC0(x) (((x) & 0x1FF) << 0)
#define G_008DFC_SRC0(x) (((x) >> 0) & 0x1FF)
@@ -3740,7 +5000,17 @@
#define C_008DFC_ENCODING 0x03FFFFFF
#define V_008DFC_SQ_ENC_MUBUF_FIELD 0x38
#endif
+#define R_030E00_TA_CS_BC_BASE_ADDR 0x030E00
+#define R_030E04_TA_CS_BC_BASE_ADDR_HI 0x030E04
+#define S_030E04_ADDRESS(x) (((x) & 0xFF) << 0)
+#define G_030E04_ADDRESS(x) (((x) >> 0) & 0xFF)
+#define C_030E04_ADDRESS 0xFFFFFF00
+#define R_030F00_DB_OCCLUSION_COUNT0_LOW 0x030F00
#define R_008F00_SQ_BUF_RSRC_WORD0 0x008F00
+#define R_030F04_DB_OCCLUSION_COUNT0_HI 0x030F04
+#define S_030F04_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030F04_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030F04_COUNT_HI 0x80000000
#define R_008F04_SQ_BUF_RSRC_WORD1 0x008F04
#define S_008F04_BASE_ADDRESS_HI(x) (((x) & 0xFFFF) << 0)
#define G_008F04_BASE_ADDRESS_HI(x) (((x) >> 0) & 0xFFFF)
@@ -3754,7 +5024,12 @@
#define S_008F04_SWIZZLE_ENABLE(x) (((x) & 0x1) << 31)
#define G_008F04_SWIZZLE_ENABLE(x) (((x) >> 31) & 0x1)
#define C_008F04_SWIZZLE_ENABLE 0x7FFFFFFF
+#define R_030F08_DB_OCCLUSION_COUNT1_LOW 0x030F08
#define R_008F08_SQ_BUF_RSRC_WORD2 0x008F08
+#define R_030F0C_DB_OCCLUSION_COUNT1_HI 0x030F0C
+#define S_030F0C_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030F0C_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030F0C_COUNT_HI 0x80000000
#define R_008F0C_SQ_BUF_RSRC_WORD3 0x008F0C
#define S_008F0C_DST_SEL_X(x) (((x) & 0x07) << 0)
#define G_008F0C_DST_SEL_X(x) (((x) >> 0) & 0x07)
@@ -3862,7 +5137,12 @@
#define V_008F0C_SQ_RSRC_BUF_RSVD_1 0x01
#define V_008F0C_SQ_RSRC_BUF_RSVD_2 0x02
#define V_008F0C_SQ_RSRC_BUF_RSVD_3 0x03
+#define R_030F10_DB_OCCLUSION_COUNT2_LOW 0x030F10
#define R_008F10_SQ_IMG_RSRC_WORD0 0x008F10
+#define R_030F14_DB_OCCLUSION_COUNT2_HI 0x030F14
+#define S_030F14_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030F14_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030F14_COUNT_HI 0x80000000
#define R_008F14_SQ_IMG_RSRC_WORD1 0x008F14
#define S_008F14_BASE_ADDRESS_HI(x) (((x) & 0xFF) << 0)
#define G_008F14_BASE_ADDRESS_HI(x) (((x) >> 0) & 0xFF)
@@ -3961,6 +5241,7 @@
#define G_008F14_MTYPE(x) (((x) >> 30) & 0x03)
#define C_008F14_MTYPE 0x3FFFFFFF
/* */
+#define R_030F18_DB_OCCLUSION_COUNT3_LOW 0x030F18
#define R_008F18_SQ_IMG_RSRC_WORD2 0x008F18
#define S_008F18_WIDTH(x) (((x) & 0x3FFF) << 0)
#define G_008F18_WIDTH(x) (((x) >> 0) & 0x3FFF)
@@ -3974,6 +5255,10 @@
#define S_008F18_INTERLACED(x) (((x) & 0x1) << 31)
#define G_008F18_INTERLACED(x) (((x) >> 31) & 0x1)
#define C_008F18_INTERLACED 0x7FFFFFFF
+#define R_030F1C_DB_OCCLUSION_COUNT3_HI 0x030F1C
+#define S_030F1C_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030F1C_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030F1C_COUNT_HI 0x80000000
#define R_008F1C_SQ_IMG_RSRC_WORD3 0x008F1C
#define S_008F1C_DST_SEL_X(x) (((x) & 0x07) << 0)
#define G_008F1C_DST_SEL_X(x) (((x) >> 0) & 0x07)
@@ -4084,6 +5369,23 @@
#define G_008F28_LOD_HDW_CNT_EN(x) (((x) >> 20) & 0x1)
#define C_008F28_LOD_HDW_CNT_EN 0xFFEFFFFF
/* */
+/* VI */
+#define S_008F28_COMPRESSION_EN(x) (((x) & 0x1) << 21)
+#define G_008F28_COMPRESSION_EN(x) (((x) >> 21) & 0x1)
+#define C_008F28_COMPRESSION_EN 0xFFDFFFFF
+#define S_008F28_ALPHA_IS_ON_MSB(x) (((x) & 0x1) << 22)
+#define G_008F28_ALPHA_IS_ON_MSB(x) (((x) >> 22) & 0x1)
+#define C_008F28_ALPHA_IS_ON_MSB 0xFFBFFFFF
+#define S_008F28_COLOR_TRANSFORM(x) (((x) & 0x1) << 23)
+#define G_008F28_COLOR_TRANSFORM(x) (((x) >> 23) & 0x1)
+#define C_008F28_COLOR_TRANSFORM 0xFF7FFFFF
+#define S_008F28_LOST_ALPHA_BITS(x) (((x) & 0x0F) << 24)
+#define G_008F28_LOST_ALPHA_BITS(x) (((x) >> 24) & 0x0F)
+#define C_008F28_LOST_ALPHA_BITS 0xF0FFFFFF
+#define S_008F28_LOST_COLOR_BITS(x) (((x) & 0x0F) << 28)
+#define G_008F28_LOST_COLOR_BITS(x) (((x) >> 28) & 0x0F)
+#define C_008F28_LOST_COLOR_BITS 0x0FFFFFFF
+/* */
#define R_008F2C_SQ_IMG_RSRC_WORD7 0x008F2C
#define R_008F30_SQ_IMG_SAMP_WORD0 0x008F30
#define S_008F30_CLAMP_X(x) (((x) & 0x07) << 0)
@@ -4148,6 +5450,11 @@
#define S_008F30_FILTER_MODE(x) (((x) & 0x03) << 29)
#define G_008F30_FILTER_MODE(x) (((x) >> 29) & 0x03)
#define C_008F30_FILTER_MODE 0x9FFFFFFF
+/* VI */
+#define S_008F30_COMPAT_MODE(x) (((x) & 0x1) << 31)
+#define G_008F30_COMPAT_MODE(x) (((x) >> 31) & 0x1)
+#define C_008F30_COMPAT_MODE 0x7FFFFFFF
+/* */
#define R_008F34_SQ_IMG_SAMP_WORD1 0x008F34
#define S_008F34_MIN_LOD(x) (((x) & 0xFFF) << 0)
#define G_008F34_MIN_LOD(x) (((x) >> 0) & 0xFFF)
@@ -4313,6 +5620,11 @@
#define G_008F44_OFFSET(x) (((x) >> 0) & 0xFFFFFF)
#define C_008F44_OFFSET 0xFF000000
/* */
+#define R_030FF8_DB_ZPASS_COUNT_LOW 0x030FF8
+#define R_030FFC_DB_ZPASS_COUNT_HI 0x030FFC
+#define S_030FFC_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030FFC_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030FFC_COUNT_HI 0x80000000
#define R_009100_SPI_CONFIG_CNTL 0x009100
#define S_009100_GPR_WRITE_PRIORITY(x) (((x) & 0x1FFFFF) << 0)
#define G_009100_GPR_WRITE_PRIORITY(x) (((x) >> 0) & 0x1FFFFF)
@@ -4437,6 +5749,34 @@
#define S_009858_MSAA16_Y(x) (((x) & 0x03) << 18)
#define G_009858_MSAA16_Y(x) (((x) >> 18) & 0x03)
#define C_009858_MSAA16_Y 0xFFF3FFFF
+#define R_0098F8_GB_ADDR_CONFIG 0x0098F8
+#define S_0098F8_NUM_PIPES(x) (((x) & 0x07) << 0)
+#define G_0098F8_NUM_PIPES(x) (((x) >> 0) & 0x07)
+#define C_0098F8_NUM_PIPES 0xFFFFFFF8
+#define S_0098F8_PIPE_INTERLEAVE_SIZE(x) (((x) & 0x07) << 4)
+#define G_0098F8_PIPE_INTERLEAVE_SIZE(x) (((x) >> 4) & 0x07)
+#define C_0098F8_PIPE_INTERLEAVE_SIZE 0xFFFFFF8F
+#define S_0098F8_BANK_INTERLEAVE_SIZE(x) (((x) & 0x07) << 8)
+#define G_0098F8_BANK_INTERLEAVE_SIZE(x) (((x) >> 8) & 0x07)
+#define C_0098F8_BANK_INTERLEAVE_SIZE 0xFFFFF8FF
+#define S_0098F8_NUM_SHADER_ENGINES(x) (((x) & 0x03) << 12)
+#define G_0098F8_NUM_SHADER_ENGINES(x) (((x) >> 12) & 0x03)
+#define C_0098F8_NUM_SHADER_ENGINES 0xFFFFCFFF
+#define S_0098F8_SHADER_ENGINE_TILE_SIZE(x) (((x) & 0x07) << 16)
+#define G_0098F8_SHADER_ENGINE_TILE_SIZE(x) (((x) >> 16) & 0x07)
+#define C_0098F8_SHADER_ENGINE_TILE_SIZE 0xFFF8FFFF
+#define S_0098F8_NUM_GPUS(x) (((x) & 0x07) << 20)
+#define G_0098F8_NUM_GPUS(x) (((x) >> 20) & 0x07)
+#define C_0098F8_NUM_GPUS 0xFF8FFFFF
+#define S_0098F8_MULTI_GPU_TILE_SIZE(x) (((x) & 0x03) << 24)
+#define G_0098F8_MULTI_GPU_TILE_SIZE(x) (((x) >> 24) & 0x03)
+#define C_0098F8_MULTI_GPU_TILE_SIZE 0xFCFFFFFF
+#define S_0098F8_ROW_SIZE(x) (((x) & 0x03) << 28)
+#define G_0098F8_ROW_SIZE(x) (((x) >> 28) & 0x03)
+#define C_0098F8_ROW_SIZE 0xCFFFFFFF
+#define S_0098F8_NUM_LOWER_PIPES(x) (((x) & 0x1) << 30)
+#define G_0098F8_NUM_LOWER_PIPES(x) (((x) >> 30) & 0x1)
+#define C_0098F8_NUM_LOWER_PIPES 0xBFFFFFFF
#define R_009910_GB_TILE_MODE0 0x009910
#define S_009910_MICRO_TILE_MODE(x) (((x) & 0x03) << 0)
#define G_009910_MICRO_TILE_MODE(x) (((x) >> 0) & 0x03)
@@ -4515,14 +5855,88 @@
#define V_009910_ADDR_SURF_4_BANK 0x01
#define V_009910_ADDR_SURF_8_BANK 0x02
#define V_009910_ADDR_SURF_16_BANK 0x03
-/* CIK */
#define S_009910_MICRO_TILE_MODE_NEW(x) (((x) & 0x07) << 22)
#define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> 22) & 0x07)
-#define C_009910_MICRO_TILE_MODE_NEW(x) 0xFE3FFFFF
+#define C_009910_MICRO_TILE_MODE_NEW 0xFE3FFFFF
#define V_009910_ADDR_SURF_DISPLAY_MICRO_TILING 0x00
#define V_009910_ADDR_SURF_THIN_MICRO_TILING 0x01
#define V_009910_ADDR_SURF_DEPTH_MICRO_TILING 0x02
#define V_009910_ADDR_SURF_ROTATED_MICRO_TILING 0x03
+#define S_009910_SAMPLE_SPLIT(x) (((x) & 0x03) << 25)
+#define G_009910_SAMPLE_SPLIT(x) (((x) >> 25) & 0x03)
+#define C_009910_SAMPLE_SPLIT 0xF9FFFFFF
+#define R_009914_GB_TILE_MODE1 0x009914
+#define R_009918_GB_TILE_MODE2 0x009918
+#define R_00991C_GB_TILE_MODE3 0x00991C
+#define R_009920_GB_TILE_MODE4 0x009920
+#define R_009924_GB_TILE_MODE5 0x009924
+#define R_009928_GB_TILE_MODE6 0x009928
+#define R_00992C_GB_TILE_MODE7 0x00992C
+#define R_009930_GB_TILE_MODE8 0x009930
+#define R_009934_GB_TILE_MODE9 0x009934
+#define R_009938_GB_TILE_MODE10 0x009938
+#define R_00993C_GB_TILE_MODE11 0x00993C
+#define R_009940_GB_TILE_MODE12 0x009940
+#define R_009944_GB_TILE_MODE13 0x009944
+#define R_009948_GB_TILE_MODE14 0x009948
+#define R_00994C_GB_TILE_MODE15 0x00994C
+#define R_009950_GB_TILE_MODE16 0x009950
+#define R_009954_GB_TILE_MODE17 0x009954
+#define R_009958_GB_TILE_MODE18 0x009958
+#define R_00995C_GB_TILE_MODE19 0x00995C
+#define R_009960_GB_TILE_MODE20 0x009960
+#define R_009964_GB_TILE_MODE21 0x009964
+#define R_009968_GB_TILE_MODE22 0x009968
+#define R_00996C_GB_TILE_MODE23 0x00996C
+#define R_009970_GB_TILE_MODE24 0x009970
+#define R_009974_GB_TILE_MODE25 0x009974
+#define R_009978_GB_TILE_MODE26 0x009978
+#define R_00997C_GB_TILE_MODE27 0x00997C
+#define R_009980_GB_TILE_MODE28 0x009980
+#define R_009984_GB_TILE_MODE29 0x009984
+#define R_009988_GB_TILE_MODE30 0x009988
+#define R_00998C_GB_TILE_MODE31 0x00998C
+/* CIK */
+#define R_009990_GB_MACROTILE_MODE0 0x009990
+#define S_009990_BANK_WIDTH(x) (((x) & 0x03) << 0)
+#define G_009990_BANK_WIDTH(x) (((x) >> 0) & 0x03)
+#define C_009990_BANK_WIDTH 0xFFFFFFFC
+#define S_009990_BANK_HEIGHT(x) (((x) & 0x03) << 2)
+#define G_009990_BANK_HEIGHT(x) (((x) >> 2) & 0x03)
+#define C_009990_BANK_HEIGHT 0xFFFFFFF3
+#define S_009990_MACRO_TILE_ASPECT(x) (((x) & 0x03) << 4)
+#define G_009990_MACRO_TILE_ASPECT(x) (((x) >> 4) & 0x03)
+#define C_009990_MACRO_TILE_ASPECT 0xFFFFFFCF
+#define S_009990_NUM_BANKS(x) (((x) & 0x03) << 6)
+#define G_009990_NUM_BANKS(x) (((x) >> 6) & 0x03)
+#define C_009990_NUM_BANKS 0xFFFFFF3F
+#define R_009994_GB_MACROTILE_MODE1 0x009994
+#define R_009998_GB_MACROTILE_MODE2 0x009998
+#define R_00999C_GB_MACROTILE_MODE3 0x00999C
+#define R_0099A0_GB_MACROTILE_MODE4 0x0099A0
+#define R_0099A4_GB_MACROTILE_MODE5 0x0099A4
+#define R_0099A8_GB_MACROTILE_MODE6 0x0099A8
+#define R_0099AC_GB_MACROTILE_MODE7 0x0099AC
+#define R_0099B0_GB_MACROTILE_MODE8 0x0099B0
+#define R_0099B4_GB_MACROTILE_MODE9 0x0099B4
+#define R_0099B8_GB_MACROTILE_MODE10 0x0099B8
+#define R_0099BC_GB_MACROTILE_MODE11 0x0099BC
+#define R_0099C0_GB_MACROTILE_MODE12 0x0099C0
+#define R_0099C4_GB_MACROTILE_MODE13 0x0099C4
+#define R_0099C8_GB_MACROTILE_MODE14 0x0099C8
+#define R_0099CC_GB_MACROTILE_MODE15 0x0099CC
+/* */
+#define R_00B000_SPI_SHADER_TBA_LO_PS 0x00B000
+#define R_00B004_SPI_SHADER_TBA_HI_PS 0x00B004
+#define S_00B004_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B004_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B004_MEM_BASE 0xFFFFFF00
+#define R_00B008_SPI_SHADER_TMA_LO_PS 0x00B008
+#define R_00B00C_SPI_SHADER_TMA_HI_PS 0x00B00C
+#define S_00B00C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B00C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B00C_MEM_BASE 0xFFFFFF00
+/* CIK */
#define R_00B01C_SPI_SHADER_PGM_RSRC3_PS 0x00B01C
#define S_00B01C_CU_EN(x) (((x) & 0xFFFF) << 0)
#define G_00B01C_CU_EN(x) (((x) >> 0) & 0xFFFF)
@@ -4582,6 +5996,9 @@
#define S_00B02C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B02C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B02C_USER_SGPR 0xFFFFFFC1
+#define S_00B02C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B02C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B02C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B02C_WAVE_CNT_EN(x) (((x) & 0x1) << 7)
#define G_00B02C_WAVE_CNT_EN(x) (((x) >> 7) & 0x1)
#define C_00B02C_WAVE_CNT_EN 0xFFFFFF7F
@@ -4591,6 +6008,9 @@
#define S_00B02C_EXCP_EN(x) (((x) & 0x7F) << 16) /* mask is 0x1FF on CIK */
#define G_00B02C_EXCP_EN(x) (((x) >> 16) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B02C_EXCP_EN 0xFF80FFFF /* mask is 0x1FF on CIK */
+#define S_00B02C_EXCP_EN_CIK(x) (((x) & 0x1FF) << 16)
+#define G_00B02C_EXCP_EN_CIK(x) (((x) >> 16) & 0x1FF)
+#define C_00B02C_EXCP_EN_CIK 0xFE00FFFF
#define R_00B030_SPI_SHADER_USER_DATA_PS_0 0x00B030
#define R_00B034_SPI_SHADER_USER_DATA_PS_1 0x00B034
#define R_00B038_SPI_SHADER_USER_DATA_PS_2 0x00B038
@@ -4607,6 +6027,16 @@
#define R_00B064_SPI_SHADER_USER_DATA_PS_13 0x00B064
#define R_00B068_SPI_SHADER_USER_DATA_PS_14 0x00B068
#define R_00B06C_SPI_SHADER_USER_DATA_PS_15 0x00B06C
+#define R_00B100_SPI_SHADER_TBA_LO_VS 0x00B100
+#define R_00B104_SPI_SHADER_TBA_HI_VS 0x00B104
+#define S_00B104_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B104_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B104_MEM_BASE 0xFFFFFF00
+#define R_00B108_SPI_SHADER_TMA_LO_VS 0x00B108
+#define R_00B10C_SPI_SHADER_TMA_HI_VS 0x00B10C
+#define S_00B10C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B10C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B10C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B118_SPI_SHADER_PGM_RSRC3_VS 0x00B118
#define S_00B118_CU_EN(x) (((x) & 0xFFFF) << 0)
@@ -4674,6 +6104,9 @@
#define S_00B12C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B12C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B12C_USER_SGPR 0xFFFFFFC1
+#define S_00B12C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B12C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B12C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B12C_OC_LDS_EN(x) (((x) & 0x1) << 7)
#define G_00B12C_OC_LDS_EN(x) (((x) >> 7) & 0x1)
#define C_00B12C_OC_LDS_EN 0xFFFFFF7F
@@ -4695,6 +6128,14 @@
#define S_00B12C_EXCP_EN(x) (((x) & 0x7F) << 13) /* mask is 0x1FF on CIK */
#define G_00B12C_EXCP_EN(x) (((x) >> 13) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B12C_EXCP_EN 0xFFF01FFF /* mask is 0x1FF on CIK */
+#define S_00B12C_EXCP_EN_CIK(x) (((x) & 0x1FF) << 13)
+#define G_00B12C_EXCP_EN_CIK(x) (((x) >> 13) & 0x1FF)
+#define C_00B12C_EXCP_EN_CIK 0xFFC01FFF
+/* VI */
+#define S_00B12C_DISPATCH_DRAW_EN(x) (((x) & 0x1) << 24)
+#define G_00B12C_DISPATCH_DRAW_EN(x) (((x) >> 24) & 0x1)
+#define C_00B12C_DISPATCH_DRAW_EN 0xFEFFFFFF
+/* */
#define R_00B130_SPI_SHADER_USER_DATA_VS_0 0x00B130
#define R_00B134_SPI_SHADER_USER_DATA_VS_1 0x00B134
#define R_00B138_SPI_SHADER_USER_DATA_VS_2 0x00B138
@@ -4711,6 +6152,16 @@
#define R_00B164_SPI_SHADER_USER_DATA_VS_13 0x00B164
#define R_00B168_SPI_SHADER_USER_DATA_VS_14 0x00B168
#define R_00B16C_SPI_SHADER_USER_DATA_VS_15 0x00B16C
+#define R_00B200_SPI_SHADER_TBA_LO_GS 0x00B200
+#define R_00B204_SPI_SHADER_TBA_HI_GS 0x00B204
+#define S_00B204_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B204_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B204_MEM_BASE 0xFFFFFF00
+#define R_00B208_SPI_SHADER_TMA_LO_GS 0x00B208
+#define R_00B20C_SPI_SHADER_TMA_HI_GS 0x00B20C
+#define S_00B20C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B20C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B20C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B21C_SPI_SHADER_PGM_RSRC3_GS 0x00B21C
#define S_00B21C_CU_EN(x) (((x) & 0xFFFF) << 0)
@@ -4723,6 +6174,11 @@
#define G_00B21C_LOCK_LOW_THRESHOLD(x) (((x) >> 22) & 0x0F)
#define C_00B21C_LOCK_LOW_THRESHOLD 0xFC3FFFFF
/* */
+/* VI */
+#define S_00B21C_GROUP_FIFO_DEPTH(x) (((x) & 0x3F) << 26)
+#define G_00B21C_GROUP_FIFO_DEPTH(x) (((x) >> 26) & 0x3F)
+#define C_00B21C_GROUP_FIFO_DEPTH 0x03FFFFFF
+/* */
#define R_00B220_SPI_SHADER_PGM_LO_GS 0x00B220
#define R_00B224_SPI_SHADER_PGM_HI_GS 0x00B224
#define S_00B224_MEM_BASE(x) (((x) & 0xFF) << 0)
@@ -4771,10 +6227,41 @@
#define S_00B22C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B22C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B22C_USER_SGPR 0xFFFFFFC1
+#define S_00B22C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B22C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B22C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B22C_EXCP_EN(x) (((x) & 0x7F) << 7) /* mask is 0x1FF on CIK */
#define G_00B22C_EXCP_EN(x) (((x) >> 7) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B22C_EXCP_EN 0xFFFFC07F /* mask is 0x1FF on CIK */
+#define S_00B22C_EXCP_EN_CIK(x) (((x) & 0x1FF) << 7)
+#define G_00B22C_EXCP_EN_CIK(x) (((x) >> 7) & 0x1FF)
+#define C_00B22C_EXCP_EN_CIK 0xFFFF007F
#define R_00B230_SPI_SHADER_USER_DATA_GS_0 0x00B230
+#define R_00B234_SPI_SHADER_USER_DATA_GS_1 0x00B234
+#define R_00B238_SPI_SHADER_USER_DATA_GS_2 0x00B238
+#define R_00B23C_SPI_SHADER_USER_DATA_GS_3 0x00B23C
+#define R_00B240_SPI_SHADER_USER_DATA_GS_4 0x00B240
+#define R_00B244_SPI_SHADER_USER_DATA_GS_5 0x00B244
+#define R_00B248_SPI_SHADER_USER_DATA_GS_6 0x00B248
+#define R_00B24C_SPI_SHADER_USER_DATA_GS_7 0x00B24C
+#define R_00B250_SPI_SHADER_USER_DATA_GS_8 0x00B250
+#define R_00B254_SPI_SHADER_USER_DATA_GS_9 0x00B254
+#define R_00B258_SPI_SHADER_USER_DATA_GS_10 0x00B258
+#define R_00B25C_SPI_SHADER_USER_DATA_GS_11 0x00B25C
+#define R_00B260_SPI_SHADER_USER_DATA_GS_12 0x00B260
+#define R_00B264_SPI_SHADER_USER_DATA_GS_13 0x00B264
+#define R_00B268_SPI_SHADER_USER_DATA_GS_14 0x00B268
+#define R_00B26C_SPI_SHADER_USER_DATA_GS_15 0x00B26C
+#define R_00B300_SPI_SHADER_TBA_LO_ES 0x00B300
+#define R_00B304_SPI_SHADER_TBA_HI_ES 0x00B304
+#define S_00B304_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B304_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B304_MEM_BASE 0xFFFFFF00
+#define R_00B308_SPI_SHADER_TMA_LO_ES 0x00B308
+#define R_00B30C_SPI_SHADER_TMA_HI_ES 0x00B30C
+#define S_00B30C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B30C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B30C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B31C_SPI_SHADER_PGM_RSRC3_ES 0x00B31C
#define S_00B31C_CU_EN(x) (((x) & 0xFFFF) << 0)
@@ -4787,6 +6274,11 @@
#define G_00B31C_LOCK_LOW_THRESHOLD(x) (((x) >> 22) & 0x0F)
#define C_00B31C_LOCK_LOW_THRESHOLD 0xFC3FFFFF
/* */
+/* VI */
+#define S_00B31C_GROUP_FIFO_DEPTH(x) (((x) & 0x3F) << 26)
+#define G_00B31C_GROUP_FIFO_DEPTH(x) (((x) >> 26) & 0x3F)
+#define C_00B31C_GROUP_FIFO_DEPTH 0x03FFFFFF
+/* */
#define R_00B320_SPI_SHADER_PGM_LO_ES 0x00B320
#define R_00B324_SPI_SHADER_PGM_HI_ES 0x00B324
#define S_00B324_MEM_BASE(x) (((x) & 0xFF) << 0)
@@ -4838,6 +6330,9 @@
#define S_00B32C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B32C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B32C_USER_SGPR 0xFFFFFFC1
+#define S_00B32C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B32C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B32C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B32C_OC_LDS_EN(x) (((x) & 0x1) << 7)
#define G_00B32C_OC_LDS_EN(x) (((x) >> 7) & 0x1)
#define C_00B32C_OC_LDS_EN 0xFFFFFF7F
@@ -4848,6 +6343,31 @@
#define G_00B32C_LDS_SIZE(x) (((x) >> 20) & 0x1FF) /* CIK, for on-chip GS */
#define C_00B32C_LDS_SIZE 0xE00FFFFF /* CIK, for on-chip GS */
#define R_00B330_SPI_SHADER_USER_DATA_ES_0 0x00B330
+#define R_00B334_SPI_SHADER_USER_DATA_ES_1 0x00B334
+#define R_00B338_SPI_SHADER_USER_DATA_ES_2 0x00B338
+#define R_00B33C_SPI_SHADER_USER_DATA_ES_3 0x00B33C
+#define R_00B340_SPI_SHADER_USER_DATA_ES_4 0x00B340
+#define R_00B344_SPI_SHADER_USER_DATA_ES_5 0x00B344
+#define R_00B348_SPI_SHADER_USER_DATA_ES_6 0x00B348
+#define R_00B34C_SPI_SHADER_USER_DATA_ES_7 0x00B34C
+#define R_00B350_SPI_SHADER_USER_DATA_ES_8 0x00B350
+#define R_00B354_SPI_SHADER_USER_DATA_ES_9 0x00B354
+#define R_00B358_SPI_SHADER_USER_DATA_ES_10 0x00B358
+#define R_00B35C_SPI_SHADER_USER_DATA_ES_11 0x00B35C
+#define R_00B360_SPI_SHADER_USER_DATA_ES_12 0x00B360
+#define R_00B364_SPI_SHADER_USER_DATA_ES_13 0x00B364
+#define R_00B368_SPI_SHADER_USER_DATA_ES_14 0x00B368
+#define R_00B36C_SPI_SHADER_USER_DATA_ES_15 0x00B36C
+#define R_00B400_SPI_SHADER_TBA_LO_HS 0x00B400
+#define R_00B404_SPI_SHADER_TBA_HI_HS 0x00B404
+#define S_00B404_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B404_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B404_MEM_BASE 0xFFFFFF00
+#define R_00B408_SPI_SHADER_TMA_LO_HS 0x00B408
+#define R_00B40C_SPI_SHADER_TMA_HI_HS 0x00B40C
+#define S_00B40C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B40C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B40C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B41C_SPI_SHADER_PGM_RSRC3_HS 0x00B41C
#define S_00B41C_WAVE_LIMIT(x) (((x) & 0x3F) << 0)
@@ -4857,6 +6377,11 @@
#define G_00B41C_LOCK_LOW_THRESHOLD(x) (((x) >> 6) & 0x0F)
#define C_00B41C_LOCK_LOW_THRESHOLD 0xFFFFFC3F
/* */
+/* VI */
+#define S_00B41C_GROUP_FIFO_DEPTH(x) (((x) & 0x3F) << 10)
+#define G_00B41C_GROUP_FIFO_DEPTH(x) (((x) >> 10) & 0x3F)
+#define C_00B41C_GROUP_FIFO_DEPTH 0xFFFF03FF
+/* */
#define R_00B420_SPI_SHADER_PGM_LO_HS 0x00B420
#define R_00B424_SPI_SHADER_PGM_HI_HS 0x00B424
#define S_00B424_MEM_BASE(x) (((x) & 0xFF) << 0)
@@ -4902,6 +6427,9 @@
#define S_00B42C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B42C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B42C_USER_SGPR 0xFFFFFFC1
+#define S_00B42C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B42C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B42C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B42C_OC_LDS_EN(x) (((x) & 0x1) << 7)
#define G_00B42C_OC_LDS_EN(x) (((x) >> 7) & 0x1)
#define C_00B42C_OC_LDS_EN 0xFFFFFF7F
@@ -4912,6 +6440,31 @@
#define G_00B42C_EXCP_EN(x) (((x) >> 9) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B42C_EXCP_EN 0xFFFF01FF /* mask is 0x1FF on CIK */
#define R_00B430_SPI_SHADER_USER_DATA_HS_0 0x00B430
+#define R_00B434_SPI_SHADER_USER_DATA_HS_1 0x00B434
+#define R_00B438_SPI_SHADER_USER_DATA_HS_2 0x00B438
+#define R_00B43C_SPI_SHADER_USER_DATA_HS_3 0x00B43C
+#define R_00B440_SPI_SHADER_USER_DATA_HS_4 0x00B440
+#define R_00B444_SPI_SHADER_USER_DATA_HS_5 0x00B444
+#define R_00B448_SPI_SHADER_USER_DATA_HS_6 0x00B448
+#define R_00B44C_SPI_SHADER_USER_DATA_HS_7 0x00B44C
+#define R_00B450_SPI_SHADER_USER_DATA_HS_8 0x00B450
+#define R_00B454_SPI_SHADER_USER_DATA_HS_9 0x00B454
+#define R_00B458_SPI_SHADER_USER_DATA_HS_10 0x00B458
+#define R_00B45C_SPI_SHADER_USER_DATA_HS_11 0x00B45C
+#define R_00B460_SPI_SHADER_USER_DATA_HS_12 0x00B460
+#define R_00B464_SPI_SHADER_USER_DATA_HS_13 0x00B464
+#define R_00B468_SPI_SHADER_USER_DATA_HS_14 0x00B468
+#define R_00B46C_SPI_SHADER_USER_DATA_HS_15 0x00B46C
+#define R_00B500_SPI_SHADER_TBA_LO_LS 0x00B500
+#define R_00B504_SPI_SHADER_TBA_HI_LS 0x00B504
+#define S_00B504_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B504_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B504_MEM_BASE 0xFFFFFF00
+#define R_00B508_SPI_SHADER_TMA_LO_LS 0x00B508
+#define R_00B50C_SPI_SHADER_TMA_HI_LS 0x00B50C
+#define S_00B50C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B50C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B50C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B51C_SPI_SHADER_PGM_RSRC3_LS 0x00B51C
#define S_00B51C_CU_EN(x) (((x) & 0xFFFF) << 0)
@@ -4924,6 +6477,11 @@
#define G_00B51C_LOCK_LOW_THRESHOLD(x) (((x) >> 22) & 0x0F)
#define C_00B51C_LOCK_LOW_THRESHOLD 0xFC3FFFFF
/* */
+/* VI */
+#define S_00B51C_GROUP_FIFO_DEPTH(x) (((x) & 0x3F) << 26)
+#define G_00B51C_GROUP_FIFO_DEPTH(x) (((x) >> 26) & 0x3F)
+#define C_00B51C_GROUP_FIFO_DEPTH 0x03FFFFFF
+/* */
#define R_00B520_SPI_SHADER_PGM_LO_LS 0x00B520
#define R_00B524_SPI_SHADER_PGM_HI_LS 0x00B524
#define S_00B524_MEM_BASE(x) (((x) & 0xFF) << 0)
@@ -4972,6 +6530,9 @@
#define S_00B52C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B52C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B52C_USER_SGPR 0xFFFFFFC1
+#define S_00B52C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B52C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B52C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B52C_LDS_SIZE(x) (((x) & 0x1FF) << 7)
#define G_00B52C_LDS_SIZE(x) (((x) >> 7) & 0x1FF)
#define C_00B52C_LDS_SIZE 0xFFFF007F
@@ -4979,6 +6540,21 @@
#define G_00B52C_EXCP_EN(x) (((x) >> 16) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B52C_EXCP_EN 0xFF80FFFF /* mask is 0x1FF on CIK */
#define R_00B530_SPI_SHADER_USER_DATA_LS_0 0x00B530
+#define R_00B534_SPI_SHADER_USER_DATA_LS_1 0x00B534
+#define R_00B538_SPI_SHADER_USER_DATA_LS_2 0x00B538
+#define R_00B53C_SPI_SHADER_USER_DATA_LS_3 0x00B53C
+#define R_00B540_SPI_SHADER_USER_DATA_LS_4 0x00B540
+#define R_00B544_SPI_SHADER_USER_DATA_LS_5 0x00B544
+#define R_00B548_SPI_SHADER_USER_DATA_LS_6 0x00B548
+#define R_00B54C_SPI_SHADER_USER_DATA_LS_7 0x00B54C
+#define R_00B550_SPI_SHADER_USER_DATA_LS_8 0x00B550
+#define R_00B554_SPI_SHADER_USER_DATA_LS_9 0x00B554
+#define R_00B558_SPI_SHADER_USER_DATA_LS_10 0x00B558
+#define R_00B55C_SPI_SHADER_USER_DATA_LS_11 0x00B55C
+#define R_00B560_SPI_SHADER_USER_DATA_LS_12 0x00B560
+#define R_00B564_SPI_SHADER_USER_DATA_LS_13 0x00B564
+#define R_00B568_SPI_SHADER_USER_DATA_LS_14 0x00B568
+#define R_00B56C_SPI_SHADER_USER_DATA_LS_15 0x00B56C
#define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800
#define S_00B800_COMPUTE_SHADER_EN(x) (((x) & 0x1) << 0)
#define G_00B800_COMPUTE_SHADER_EN(x) (((x) >> 0) & 0x1)
@@ -5049,6 +6625,16 @@
#define S_00B82C_MAX_WAVE_ID(x) (((x) & 0xFFF) << 0)
#define G_00B82C_MAX_WAVE_ID(x) (((x) >> 0) & 0xFFF)
#define C_00B82C_MAX_WAVE_ID 0xFFFFF000
+/* CIK */
+#define R_00B828_COMPUTE_PIPELINESTAT_ENABLE 0x00B828
+#define S_00B828_PIPELINESTAT_ENABLE(x) (((x) & 0x1) << 0)
+#define G_00B828_PIPELINESTAT_ENABLE(x) (((x) >> 0) & 0x1)
+#define C_00B828_PIPELINESTAT_ENABLE 0xFFFFFFFE
+#define R_00B82C_COMPUTE_PERFCOUNT_ENABLE 0x00B82C
+#define S_00B82C_PERFCOUNT_ENABLE(x) (((x) & 0x1) << 0)
+#define G_00B82C_PERFCOUNT_ENABLE(x) (((x) >> 0) & 0x1)
+#define C_00B82C_PERFCOUNT_ENABLE 0xFFFFFFFE
+/* */
#define R_00B830_COMPUTE_PGM_LO 0x00B830
#define R_00B834_COMPUTE_PGM_HI 0x00B834
#define S_00B834_DATA(x) (((x) & 0xFF) << 0)
@@ -5059,6 +6645,16 @@
#define G_00B834_INST_ATC(x) (((x) >> 8) & 0x1)
#define C_00B834_INST_ATC 0xFFFFFEFF
/* */
+#define R_00B838_COMPUTE_TBA_LO 0x00B838
+#define R_00B83C_COMPUTE_TBA_HI 0x00B83C
+#define S_00B83C_DATA(x) (((x) & 0xFF) << 0)
+#define G_00B83C_DATA(x) (((x) >> 0) & 0xFF)
+#define C_00B83C_DATA 0xFFFFFF00
+#define R_00B840_COMPUTE_TMA_LO 0x00B840
+#define R_00B844_COMPUTE_TMA_HI 0x00B844
+#define S_00B844_DATA(x) (((x) & 0xFF) << 0)
+#define G_00B844_DATA(x) (((x) >> 0) & 0xFF)
+#define C_00B844_DATA 0xFFFFFF00
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0)
#define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F)
@@ -5099,6 +6695,9 @@
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B84C_USER_SGPR 0xFFFFFFC1
+#define S_00B84C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B84C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B84C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
#define C_00B84C_TGID_X_EN 0xFFFFFF7F
@@ -5125,6 +6724,10 @@
#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
#define C_00B84C_EXCP_EN 0x80FFFFFF
+#define R_00B850_COMPUTE_VMID 0x00B850
+#define S_00B850_DATA(x) (((x) & 0x0F) << 0)
+#define G_00B850_DATA(x) (((x) >> 0) & 0x0F)
+#define C_00B850_DATA 0xFFFFFFF0
#define R_00B854_COMPUTE_RESOURCE_LIMITS 0x00B854
#define S_00B854_WAVES_PER_SH(x) (((x) & 0x3F) << 0) /* mask is 0x3FF on CIK */
#define G_00B854_WAVES_PER_SH(x) (((x) >> 0) & 0x3F) /* mask is 0x3FF on CIK */
@@ -5167,7 +6770,84 @@
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
#define G_00B860_WAVESIZE(x) (((x) >> 12) & 0x1FFF)
#define C_00B860_WAVESIZE 0xFE000FFF
+/* CIK */
+#define R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2 0x00B864
+#define S_00B864_SH0_CU_EN(x) (((x) & 0xFFFF) << 0)
+#define G_00B864_SH0_CU_EN(x) (((x) >> 0) & 0xFFFF)
+#define C_00B864_SH0_CU_EN 0xFFFF0000
+#define S_00B864_SH1_CU_EN(x) (((x) & 0xFFFF) << 16)
+#define G_00B864_SH1_CU_EN(x) (((x) >> 16) & 0xFFFF)
+#define C_00B864_SH1_CU_EN 0x0000FFFF
+#define R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3 0x00B868
+#define S_00B868_SH0_CU_EN(x) (((x) & 0xFFFF) << 0)
+#define G_00B868_SH0_CU_EN(x) (((x) >> 0) & 0xFFFF)
+#define C_00B868_SH0_CU_EN 0xFFFF0000
+#define S_00B868_SH1_CU_EN(x) (((x) & 0xFFFF) << 16)
+#define G_00B868_SH1_CU_EN(x) (((x) >> 16) & 0xFFFF)
+#define C_00B868_SH1_CU_EN 0x0000FFFF
+#define R_00B86C_COMPUTE_RESTART_X 0x00B86C
+#define R_00B870_COMPUTE_RESTART_Y 0x00B870
+#define R_00B874_COMPUTE_RESTART_Z 0x00B874
+#define R_00B87C_COMPUTE_MISC_RESERVED 0x00B87C
+#define S_00B87C_SEND_SEID(x) (((x) & 0x03) << 0)
+#define G_00B87C_SEND_SEID(x) (((x) >> 0) & 0x03)
+#define C_00B87C_SEND_SEID 0xFFFFFFFC
+#define S_00B87C_RESERVED2(x) (((x) & 0x1) << 2)
+#define G_00B87C_RESERVED2(x) (((x) >> 2) & 0x1)
+#define C_00B87C_RESERVED2 0xFFFFFFFB
+#define S_00B87C_RESERVED3(x) (((x) & 0x1) << 3)
+#define G_00B87C_RESERVED3(x) (((x) >> 3) & 0x1)
+#define C_00B87C_RESERVED3 0xFFFFFFF7
+#define S_00B87C_RESERVED4(x) (((x) & 0x1) << 4)
+#define G_00B87C_RESERVED4(x) (((x) >> 4) & 0x1)
+#define C_00B87C_RESERVED4 0xFFFFFFEF
+/* VI */
+#define S_00B87C_WAVE_ID_BASE(x) (((x) & 0xFFF) << 5)
+#define G_00B87C_WAVE_ID_BASE(x) (((x) >> 5) & 0xFFF)
+#define C_00B87C_WAVE_ID_BASE 0xFFFE001F
+#define R_00B880_COMPUTE_DISPATCH_ID 0x00B880
+#define R_00B884_COMPUTE_THREADGROUP_ID 0x00B884
+#define R_00B888_COMPUTE_RELAUNCH 0x00B888
+#define S_00B888_PAYLOAD(x) (((x) & 0x3FFFFFFF) << 0)
+#define G_00B888_PAYLOAD(x) (((x) >> 0) & 0x3FFFFFFF)
+#define C_00B888_PAYLOAD 0xC0000000
+#define S_00B888_IS_EVENT(x) (((x) & 0x1) << 30)
+#define G_00B888_IS_EVENT(x) (((x) >> 30) & 0x1)
+#define C_00B888_IS_EVENT 0xBFFFFFFF
+#define S_00B888_IS_STATE(x) (((x) & 0x1) << 31)
+#define G_00B888_IS_STATE(x) (((x) >> 31) & 0x1)
+#define C_00B888_IS_STATE 0x7FFFFFFF
+#define R_00B88C_COMPUTE_WAVE_RESTORE_ADDR_LO 0x00B88C
+#define R_00B890_COMPUTE_WAVE_RESTORE_ADDR_HI 0x00B890
+#define S_00B890_ADDR(x) (((x) & 0xFFFF) << 0)
+#define G_00B890_ADDR(x) (((x) >> 0) & 0xFFFF)
+#define C_00B890_ADDR 0xFFFF0000
+#define R_00B894_COMPUTE_WAVE_RESTORE_CONTROL 0x00B894
+#define S_00B894_ATC(x) (((x) & 0x1) << 0)
+#define G_00B894_ATC(x) (((x) >> 0) & 0x1)
+#define C_00B894_ATC 0xFFFFFFFE
+#define S_00B894_MTYPE(x) (((x) & 0x03) << 1)
+#define G_00B894_MTYPE(x) (((x) >> 1) & 0x03)
+#define C_00B894_MTYPE 0xFFFFFFF9
+/* */
+/* */
#define R_00B900_COMPUTE_USER_DATA_0 0x00B900
+#define R_00B904_COMPUTE_USER_DATA_1 0x00B904
+#define R_00B908_COMPUTE_USER_DATA_2 0x00B908
+#define R_00B90C_COMPUTE_USER_DATA_3 0x00B90C
+#define R_00B910_COMPUTE_USER_DATA_4 0x00B910
+#define R_00B914_COMPUTE_USER_DATA_5 0x00B914
+#define R_00B918_COMPUTE_USER_DATA_6 0x00B918
+#define R_00B91C_COMPUTE_USER_DATA_7 0x00B91C
+#define R_00B920_COMPUTE_USER_DATA_8 0x00B920
+#define R_00B924_COMPUTE_USER_DATA_9 0x00B924
+#define R_00B928_COMPUTE_USER_DATA_10 0x00B928
+#define R_00B92C_COMPUTE_USER_DATA_11 0x00B92C
+#define R_00B930_COMPUTE_USER_DATA_12 0x00B930
+#define R_00B934_COMPUTE_USER_DATA_13 0x00B934
+#define R_00B938_COMPUTE_USER_DATA_14 0x00B938
+#define R_00B93C_COMPUTE_USER_DATA_15 0x00B93C
+#define R_00B9FC_COMPUTE_NOWHERE 0x00B9FC
#define R_028000_DB_RENDER_CONTROL 0x028000
#define S_028000_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0)
#define G_028000_DEPTH_CLEAR_ENABLE(x) (((x) >> 0) & 0x1)
@@ -5196,6 +6876,11 @@
#define S_028000_COPY_SAMPLE(x) (((x) & 0x0F) << 8)
#define G_028000_COPY_SAMPLE(x) (((x) >> 8) & 0x0F)
#define C_028000_COPY_SAMPLE 0xFFFFF0FF
+/* VI */
+#define S_028000_DECOMPRESS_ENABLE(x) (((x) & 0x1) << 12)
+#define G_028000_DECOMPRESS_ENABLE(x) (((x) >> 12) & 0x1)
+#define C_028000_DECOMPRESS_ENABLE 0xFFFFEFFF
+/* */
#define R_028004_DB_COUNT_CONTROL 0x028004
#define S_028004_ZPASS_INCREMENT_DISABLE(x) (((x) & 0x1) << 0)
#define G_028004_ZPASS_INCREMENT_DISABLE(x) (((x) >> 0) & 0x1)
@@ -5474,9 +7159,6 @@
#define S_028040_NUM_SAMPLES(x) (((x) & 0x03) << 2)
#define G_028040_NUM_SAMPLES(x) (((x) >> 2) & 0x03)
#define C_028040_NUM_SAMPLES 0xFFFFFFF3
-#define S_028040_TILE_MODE_INDEX(x) (((x) & 0x07) << 20) /* not on CIK */
-#define G_028040_TILE_MODE_INDEX(x) (((x) >> 20) & 0x07) /* not on CIK */
-#define C_028040_TILE_MODE_INDEX 0xFF8FFFFF /* not on CIK */
/* CIK */
#define S_028040_TILE_SPLIT(x) (((x) & 0x07) << 13)
#define G_028040_TILE_SPLIT(x) (((x) >> 13) & 0x07)
@@ -5489,6 +7171,14 @@
#define V_028040_ADDR_SURF_TILE_SPLIT_2KB 0x05
#define V_028040_ADDR_SURF_TILE_SPLIT_4KB 0x06
/* */
+#define S_028040_TILE_MODE_INDEX(x) (((x) & 0x07) << 20) /* not on CIK */
+#define G_028040_TILE_MODE_INDEX(x) (((x) >> 20) & 0x07) /* not on CIK */
+#define C_028040_TILE_MODE_INDEX 0xFF8FFFFF /* not on CIK */
+/* VI */
+#define S_028040_DECOMPRESS_ON_N_ZPLANES(x) (((x) & 0x0F) << 23)
+#define G_028040_DECOMPRESS_ON_N_ZPLANES(x) (((x) >> 23) & 0x0F)
+#define C_028040_DECOMPRESS_ON_N_ZPLANES 0xF87FFFFF
+/* */
#define S_028040_ALLOW_EXPCLEAR(x) (((x) & 0x1) << 27)
#define G_028040_ALLOW_EXPCLEAR(x) (((x) >> 27) & 0x1)
#define C_028040_ALLOW_EXPCLEAR 0xF7FFFFFF
@@ -5498,6 +7188,11 @@
#define S_028040_TILE_SURFACE_ENABLE(x) (((x) & 0x1) << 29)
#define G_028040_TILE_SURFACE_ENABLE(x) (((x) >> 29) & 0x1)
#define C_028040_TILE_SURFACE_ENABLE 0xDFFFFFFF
+/* VI */
+#define S_028040_CLEAR_DISALLOWED(x) (((x) & 0x1) << 30)
+#define G_028040_CLEAR_DISALLOWED(x) (((x) >> 30) & 0x1)
+#define C_028040_CLEAR_DISALLOWED 0xBFFFFFFF
+/* */
#define S_028040_ZRANGE_PRECISION(x) (((x) & 0x1) << 31)
#define G_028040_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1)
#define C_028040_ZRANGE_PRECISION 0x7FFFFFFF
@@ -5507,9 +7202,6 @@
#define C_028044_FORMAT 0xFFFFFFFE
#define V_028044_STENCIL_INVALID 0x00
#define V_028044_STENCIL_8 0x01
-#define S_028044_TILE_MODE_INDEX(x) (((x) & 0x07) << 20) /* not on CIK */
-#define G_028044_TILE_MODE_INDEX(x) (((x) >> 20) & 0x07) /* not on CIK */
-#define C_028044_TILE_MODE_INDEX 0xFF8FFFFF /* not on CIK */
/* CIK */
#define S_028044_TILE_SPLIT(x) (((x) & 0x07) << 13)
#define G_028044_TILE_SPLIT(x) (((x) >> 13) & 0x07)
@@ -5522,12 +7214,20 @@
#define V_028044_ADDR_SURF_TILE_SPLIT_2KB 0x05
#define V_028044_ADDR_SURF_TILE_SPLIT_4KB 0x06
/* */
+#define S_028044_TILE_MODE_INDEX(x) (((x) & 0x07) << 20) /* not on CIK */
+#define G_028044_TILE_MODE_INDEX(x) (((x) >> 20) & 0x07) /* not on CIK */
+#define C_028044_TILE_MODE_INDEX 0xFF8FFFFF /* not on CIK */
#define S_028044_ALLOW_EXPCLEAR(x) (((x) & 0x1) << 27)
#define G_028044_ALLOW_EXPCLEAR(x) (((x) >> 27) & 0x1)
#define C_028044_ALLOW_EXPCLEAR 0xF7FFFFFF
#define S_028044_TILE_STENCIL_DISABLE(x) (((x) & 0x1) << 29)
#define G_028044_TILE_STENCIL_DISABLE(x) (((x) >> 29) & 0x1)
#define C_028044_TILE_STENCIL_DISABLE 0xDFFFFFFF
+/* VI */
+#define S_028044_CLEAR_DISALLOWED(x) (((x) & 0x1) << 30)
+#define G_028044_CLEAR_DISALLOWED(x) (((x) >> 30) & 0x1)
+#define C_028044_CLEAR_DISALLOWED 0xBFFFFFFF
+/* */
#define R_028048_DB_Z_READ_BASE 0x028048
#define R_02804C_DB_STENCIL_READ_BASE 0x02804C
#define R_028050_DB_Z_WRITE_BASE 0x028050
@@ -5549,7 +7249,13 @@
#define S_028084_ADDRESS(x) (((x) & 0xFF) << 0)
#define G_028084_ADDRESS(x) (((x) >> 0) & 0xFF)
#define C_028084_ADDRESS 0xFFFFFF00
-/* */
+#define R_0281E8_COHER_DEST_BASE_HI_0 0x0281E8
+#define R_0281EC_COHER_DEST_BASE_HI_1 0x0281EC
+#define R_0281F0_COHER_DEST_BASE_HI_2 0x0281F0
+#define R_0281F4_COHER_DEST_BASE_HI_3 0x0281F4
+/* */
+#define R_0281F8_COHER_DEST_BASE_2 0x0281F8
+#define R_0281FC_COHER_DEST_BASE_3 0x0281FC
#define R_028200_PA_SC_WINDOW_OFFSET 0x028200
#define S_028200_WINDOW_X_OFFSET(x) (((x) & 0xFFFF) << 0)
#define G_028200_WINDOW_X_OFFSET(x) (((x) >> 0) & 0xFFFF)
@@ -5694,6 +7400,8 @@
#define S_028244_BR_Y(x) (((x) & 0x7FFF) << 16)
#define G_028244_BR_Y(x) (((x) >> 16) & 0x7FFF)
#define C_028244_BR_Y 0x8000FFFF
+#define R_028248_COHER_DEST_BASE_0 0x028248
+#define R_02824C_COHER_DEST_BASE_1 0x02824C
#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250
#define S_028250_TL_X(x) (((x) & 0x7FFF) << 0)
#define G_028250_TL_X(x) (((x) >> 0) & 0x7FFF)
@@ -5711,8 +7419,68 @@
#define S_028254_BR_Y(x) (((x) & 0x7FFF) << 16)
#define G_028254_BR_Y(x) (((x) >> 16) & 0x7FFF)
#define C_028254_BR_Y 0x8000FFFF
+#define R_028258_PA_SC_VPORT_SCISSOR_1_TL 0x028258
+#define R_02825C_PA_SC_VPORT_SCISSOR_1_BR 0x02825C
+#define R_028260_PA_SC_VPORT_SCISSOR_2_TL 0x028260
+#define R_028264_PA_SC_VPORT_SCISSOR_2_BR 0x028264
+#define R_028268_PA_SC_VPORT_SCISSOR_3_TL 0x028268
+#define R_02826C_PA_SC_VPORT_SCISSOR_3_BR 0x02826C
+#define R_028270_PA_SC_VPORT_SCISSOR_4_TL 0x028270
+#define R_028274_PA_SC_VPORT_SCISSOR_4_BR 0x028274
+#define R_028278_PA_SC_VPORT_SCISSOR_5_TL 0x028278
+#define R_02827C_PA_SC_VPORT_SCISSOR_5_BR 0x02827C
+#define R_028280_PA_SC_VPORT_SCISSOR_6_TL 0x028280
+#define R_028284_PA_SC_VPORT_SCISSOR_6_BR 0x028284
+#define R_028288_PA_SC_VPORT_SCISSOR_7_TL 0x028288
+#define R_02828C_PA_SC_VPORT_SCISSOR_7_BR 0x02828C
+#define R_028290_PA_SC_VPORT_SCISSOR_8_TL 0x028290
+#define R_028294_PA_SC_VPORT_SCISSOR_8_BR 0x028294
+#define R_028298_PA_SC_VPORT_SCISSOR_9_TL 0x028298
+#define R_02829C_PA_SC_VPORT_SCISSOR_9_BR 0x02829C
+#define R_0282A0_PA_SC_VPORT_SCISSOR_10_TL 0x0282A0
+#define R_0282A4_PA_SC_VPORT_SCISSOR_10_BR 0x0282A4
+#define R_0282A8_PA_SC_VPORT_SCISSOR_11_TL 0x0282A8
+#define R_0282AC_PA_SC_VPORT_SCISSOR_11_BR 0x0282AC
+#define R_0282B0_PA_SC_VPORT_SCISSOR_12_TL 0x0282B0
+#define R_0282B4_PA_SC_VPORT_SCISSOR_12_BR 0x0282B4
+#define R_0282B8_PA_SC_VPORT_SCISSOR_13_TL 0x0282B8
+#define R_0282BC_PA_SC_VPORT_SCISSOR_13_BR 0x0282BC
+#define R_0282C0_PA_SC_VPORT_SCISSOR_14_TL 0x0282C0
+#define R_0282C4_PA_SC_VPORT_SCISSOR_14_BR 0x0282C4
+#define R_0282C8_PA_SC_VPORT_SCISSOR_15_TL 0x0282C8
+#define R_0282CC_PA_SC_VPORT_SCISSOR_15_BR 0x0282CC
#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0
#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4
+#define R_0282D8_PA_SC_VPORT_ZMIN_1 0x0282D8
+#define R_0282DC_PA_SC_VPORT_ZMAX_1 0x0282DC
+#define R_0282E0_PA_SC_VPORT_ZMIN_2 0x0282E0
+#define R_0282E4_PA_SC_VPORT_ZMAX_2 0x0282E4
+#define R_0282E8_PA_SC_VPORT_ZMIN_3 0x0282E8
+#define R_0282EC_PA_SC_VPORT_ZMAX_3 0x0282EC
+#define R_0282F0_PA_SC_VPORT_ZMIN_4 0x0282F0
+#define R_0282F4_PA_SC_VPORT_ZMAX_4 0x0282F4
+#define R_0282F8_PA_SC_VPORT_ZMIN_5 0x0282F8
+#define R_0282FC_PA_SC_VPORT_ZMAX_5 0x0282FC
+#define R_028300_PA_SC_VPORT_ZMIN_6 0x028300
+#define R_028304_PA_SC_VPORT_ZMAX_6 0x028304
+#define R_028308_PA_SC_VPORT_ZMIN_7 0x028308
+#define R_02830C_PA_SC_VPORT_ZMAX_7 0x02830C
+#define R_028310_PA_SC_VPORT_ZMIN_8 0x028310
+#define R_028314_PA_SC_VPORT_ZMAX_8 0x028314
+#define R_028318_PA_SC_VPORT_ZMIN_9 0x028318
+#define R_02831C_PA_SC_VPORT_ZMAX_9 0x02831C
+#define R_028320_PA_SC_VPORT_ZMIN_10 0x028320
+#define R_028324_PA_SC_VPORT_ZMAX_10 0x028324
+#define R_028328_PA_SC_VPORT_ZMIN_11 0x028328
+#define R_02832C_PA_SC_VPORT_ZMAX_11 0x02832C
+#define R_028330_PA_SC_VPORT_ZMIN_12 0x028330
+#define R_028334_PA_SC_VPORT_ZMAX_12 0x028334
+#define R_028338_PA_SC_VPORT_ZMIN_13 0x028338
+#define R_02833C_PA_SC_VPORT_ZMAX_13 0x02833C
+#define R_028340_PA_SC_VPORT_ZMIN_14 0x028340
+#define R_028344_PA_SC_VPORT_ZMAX_14 0x028344
+#define R_028348_PA_SC_VPORT_ZMIN_15 0x028348
+#define R_02834C_PA_SC_VPORT_ZMAX_15 0x02834C
#define R_028350_PA_SC_RASTER_CONFIG 0x028350
#define S_028350_RB_MAP_PKR0(x) (((x) & 0x03) << 0)
#define G_028350_RB_MAP_PKR0(x) (((x) >> 0) & 0x03)
@@ -5834,6 +7602,13 @@
#define V_028354_RASTER_CONFIG_SE_PAIR_YSEL_16_WIDE_TILE 0x01
#define V_028354_RASTER_CONFIG_SE_PAIR_YSEL_32_WIDE_TILE 0x02
#define V_028354_RASTER_CONFIG_SE_PAIR_YSEL_64_WIDE_TILE 0x03
+#define R_028358_PA_SC_SCREEN_EXTENT_CONTROL 0x028358
+#define S_028358_SLICE_EVEN_ENABLE(x) (((x) & 0x03) << 0)
+#define G_028358_SLICE_EVEN_ENABLE(x) (((x) >> 0) & 0x03)
+#define C_028358_SLICE_EVEN_ENABLE 0xFFFFFFFC
+#define S_028358_SLICE_ODD_ENABLE(x) (((x) & 0x03) << 2)
+#define G_028358_SLICE_ODD_ENABLE(x) (((x) >> 2) & 0x03)
+#define C_028358_SLICE_ODD_ENABLE 0xFFFFFFF3
/* */
#define R_028400_VGT_MAX_VTX_INDX 0x028400
#define R_028404_VGT_MIN_VTX_INDX 0x028404
@@ -5843,6 +7618,18 @@
#define R_028418_CB_BLEND_GREEN 0x028418
#define R_02841C_CB_BLEND_BLUE 0x02841C
#define R_028420_CB_BLEND_ALPHA 0x028420
+/* VI */
+#define R_028424_CB_DCC_CONTROL 0x028424
+#define S_028424_OVERWRITE_COMBINER_DISABLE(x) (((x) & 0x1) << 0)
+#define G_028424_OVERWRITE_COMBINER_DISABLE(x) (((x) >> 0) & 0x1)
+#define C_028424_OVERWRITE_COMBINER_DISABLE 0xFFFFFFFE
+#define S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x) (((x) & 0x1) << 1)
+#define G_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x) (((x) >> 1) & 0x1)
+#define C_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE 0xFFFFFFFD
+#define S_028424_OVERWRITE_COMBINER_WATERMARK(x) (((x) & 0x1F) << 2)
+#define G_028424_OVERWRITE_COMBINER_WATERMARK(x) (((x) >> 2) & 0x1F)
+#define C_028424_OVERWRITE_COMBINER_WATERMARK 0xFFFFFF83
+/* */
#define R_02842C_DB_STENCIL_CONTROL 0x02842C
#define S_02842C_STENCILFAIL(x) (((x) & 0x0F) << 0)
#define G_02842C_STENCILFAIL(x) (((x) >> 0) & 0x0F)
@@ -5984,12 +7771,102 @@
#define S_028434_STENCILOPVAL_BF(x) (((x) & 0xFF) << 24)
#define G_028434_STENCILOPVAL_BF(x) (((x) >> 24) & 0xFF)
#define C_028434_STENCILOPVAL_BF 0x00FFFFFF
-#define R_02843C_PA_CL_VPORT_XSCALE_0 0x02843C
-#define R_028440_PA_CL_VPORT_XOFFSET_0 0x028440
-#define R_028444_PA_CL_VPORT_YSCALE_0 0x028444
-#define R_028448_PA_CL_VPORT_YOFFSET_0 0x028448
-#define R_02844C_PA_CL_VPORT_ZSCALE_0 0x02844C
-#define R_028450_PA_CL_VPORT_ZOFFSET_0 0x028450
+#define R_02843C_PA_CL_VPORT_XSCALE 0x02843C
+#define R_028440_PA_CL_VPORT_XOFFSET 0x028440
+#define R_028444_PA_CL_VPORT_YSCALE 0x028444
+#define R_028448_PA_CL_VPORT_YOFFSET 0x028448
+#define R_02844C_PA_CL_VPORT_ZSCALE 0x02844C
+#define R_028450_PA_CL_VPORT_ZOFFSET 0x028450
+#define R_028454_PA_CL_VPORT_XSCALE_1 0x028454
+#define R_028458_PA_CL_VPORT_XOFFSET_1 0x028458
+#define R_02845C_PA_CL_VPORT_YSCALE_1 0x02845C
+#define R_028460_PA_CL_VPORT_YOFFSET_1 0x028460
+#define R_028464_PA_CL_VPORT_ZSCALE_1 0x028464
+#define R_028468_PA_CL_VPORT_ZOFFSET_1 0x028468
+#define R_02846C_PA_CL_VPORT_XSCALE_2 0x02846C
+#define R_028470_PA_CL_VPORT_XOFFSET_2 0x028470
+#define R_028474_PA_CL_VPORT_YSCALE_2 0x028474
+#define R_028478_PA_CL_VPORT_YOFFSET_2 0x028478
+#define R_02847C_PA_CL_VPORT_ZSCALE_2 0x02847C
+#define R_028480_PA_CL_VPORT_ZOFFSET_2 0x028480
+#define R_028484_PA_CL_VPORT_XSCALE_3 0x028484
+#define R_028488_PA_CL_VPORT_XOFFSET_3 0x028488
+#define R_02848C_PA_CL_VPORT_YSCALE_3 0x02848C
+#define R_028490_PA_CL_VPORT_YOFFSET_3 0x028490
+#define R_028494_PA_CL_VPORT_ZSCALE_3 0x028494
+#define R_028498_PA_CL_VPORT_ZOFFSET_3 0x028498
+#define R_02849C_PA_CL_VPORT_XSCALE_4 0x02849C
+#define R_0284A0_PA_CL_VPORT_XOFFSET_4 0x0284A0
+#define R_0284A4_PA_CL_VPORT_YSCALE_4 0x0284A4
+#define R_0284A8_PA_CL_VPORT_YOFFSET_4 0x0284A8
+#define R_0284AC_PA_CL_VPORT_ZSCALE_4 0x0284AC
+#define R_0284B0_PA_CL_VPORT_ZOFFSET_4 0x0284B0
+#define R_0284B4_PA_CL_VPORT_XSCALE_5 0x0284B4
+#define R_0284B8_PA_CL_VPORT_XOFFSET_5 0x0284B8
+#define R_0284BC_PA_CL_VPORT_YSCALE_5 0x0284BC
+#define R_0284C0_PA_CL_VPORT_YOFFSET_5 0x0284C0
+#define R_0284C4_PA_CL_VPORT_ZSCALE_5 0x0284C4
+#define R_0284C8_PA_CL_VPORT_ZOFFSET_5 0x0284C8
+#define R_0284CC_PA_CL_VPORT_XSCALE_6 0x0284CC
+#define R_0284D0_PA_CL_VPORT_XOFFSET_6 0x0284D0
+#define R_0284D4_PA_CL_VPORT_YSCALE_6 0x0284D4
+#define R_0284D8_PA_CL_VPORT_YOFFSET_6 0x0284D8
+#define R_0284DC_PA_CL_VPORT_ZSCALE_6 0x0284DC
+#define R_0284E0_PA_CL_VPORT_ZOFFSET_6 0x0284E0
+#define R_0284E4_PA_CL_VPORT_XSCALE_7 0x0284E4
+#define R_0284E8_PA_CL_VPORT_XOFFSET_7 0x0284E8
+#define R_0284EC_PA_CL_VPORT_YSCALE_7 0x0284EC
+#define R_0284F0_PA_CL_VPORT_YOFFSET_7 0x0284F0
+#define R_0284F4_PA_CL_VPORT_ZSCALE_7 0x0284F4
+#define R_0284F8_PA_CL_VPORT_ZOFFSET_7 0x0284F8
+#define R_0284FC_PA_CL_VPORT_XSCALE_8 0x0284FC
+#define R_028500_PA_CL_VPORT_XOFFSET_8 0x028500
+#define R_028504_PA_CL_VPORT_YSCALE_8 0x028504
+#define R_028508_PA_CL_VPORT_YOFFSET_8 0x028508
+#define R_02850C_PA_CL_VPORT_ZSCALE_8 0x02850C
+#define R_028510_PA_CL_VPORT_ZOFFSET_8 0x028510
+#define R_028514_PA_CL_VPORT_XSCALE_9 0x028514
+#define R_028518_PA_CL_VPORT_XOFFSET_9 0x028518
+#define R_02851C_PA_CL_VPORT_YSCALE_9 0x02851C
+#define R_028520_PA_CL_VPORT_YOFFSET_9 0x028520
+#define R_028524_PA_CL_VPORT_ZSCALE_9 0x028524
+#define R_028528_PA_CL_VPORT_ZOFFSET_9 0x028528
+#define R_02852C_PA_CL_VPORT_XSCALE_10 0x02852C
+#define R_028530_PA_CL_VPORT_XOFFSET_10 0x028530
+#define R_028534_PA_CL_VPORT_YSCALE_10 0x028534
+#define R_028538_PA_CL_VPORT_YOFFSET_10 0x028538
+#define R_02853C_PA_CL_VPORT_ZSCALE_10 0x02853C
+#define R_028540_PA_CL_VPORT_ZOFFSET_10 0x028540
+#define R_028544_PA_CL_VPORT_XSCALE_11 0x028544
+#define R_028548_PA_CL_VPORT_XOFFSET_11 0x028548
+#define R_02854C_PA_CL_VPORT_YSCALE_11 0x02854C
+#define R_028550_PA_CL_VPORT_YOFFSET_11 0x028550
+#define R_028554_PA_CL_VPORT_ZSCALE_11 0x028554
+#define R_028558_PA_CL_VPORT_ZOFFSET_11 0x028558
+#define R_02855C_PA_CL_VPORT_XSCALE_12 0x02855C
+#define R_028560_PA_CL_VPORT_XOFFSET_12 0x028560
+#define R_028564_PA_CL_VPORT_YSCALE_12 0x028564
+#define R_028568_PA_CL_VPORT_YOFFSET_12 0x028568
+#define R_02856C_PA_CL_VPORT_ZSCALE_12 0x02856C
+#define R_028570_PA_CL_VPORT_ZOFFSET_12 0x028570
+#define R_028574_PA_CL_VPORT_XSCALE_13 0x028574
+#define R_028578_PA_CL_VPORT_XOFFSET_13 0x028578
+#define R_02857C_PA_CL_VPORT_YSCALE_13 0x02857C
+#define R_028580_PA_CL_VPORT_YOFFSET_13 0x028580
+#define R_028584_PA_CL_VPORT_ZSCALE_13 0x028584
+#define R_028588_PA_CL_VPORT_ZOFFSET_13 0x028588
+#define R_02858C_PA_CL_VPORT_XSCALE_14 0x02858C
+#define R_028590_PA_CL_VPORT_XOFFSET_14 0x028590
+#define R_028594_PA_CL_VPORT_YSCALE_14 0x028594
+#define R_028598_PA_CL_VPORT_YOFFSET_14 0x028598
+#define R_02859C_PA_CL_VPORT_ZSCALE_14 0x02859C
+#define R_0285A0_PA_CL_VPORT_ZOFFSET_14 0x0285A0
+#define R_0285A4_PA_CL_VPORT_XSCALE_15 0x0285A4
+#define R_0285A8_PA_CL_VPORT_XOFFSET_15 0x0285A8
+#define R_0285AC_PA_CL_VPORT_YSCALE_15 0x0285AC
+#define R_0285B0_PA_CL_VPORT_YOFFSET_15 0x0285B0
+#define R_0285B4_PA_CL_VPORT_ZSCALE_15 0x0285B4
+#define R_0285B8_PA_CL_VPORT_ZOFFSET_15 0x0285B8
#define R_0285BC_PA_CL_UCP_0_X 0x0285BC
#define R_0285C0_PA_CL_UCP_0_Y 0x0285C0
#define R_0285C4_PA_CL_UCP_0_Z 0x0285C4
@@ -6036,6 +7913,26 @@
#define G_028644_DUP(x) (((x) >> 18) & 0x1)
#define C_028644_DUP 0xFFFBFFFF
/* */
+/* VI */
+#define S_028644_FP16_INTERP_MODE(x) (((x) & 0x1) << 19)
+#define G_028644_FP16_INTERP_MODE(x) (((x) >> 19) & 0x1)
+#define C_028644_FP16_INTERP_MODE 0xFFF7FFFF
+#define S_028644_USE_DEFAULT_ATTR1(x) (((x) & 0x1) << 20)
+#define G_028644_USE_DEFAULT_ATTR1(x) (((x) >> 20) & 0x1)
+#define C_028644_USE_DEFAULT_ATTR1 0xFFEFFFFF
+#define S_028644_DEFAULT_VAL_ATTR1(x) (((x) & 0x03) << 21)
+#define G_028644_DEFAULT_VAL_ATTR1(x) (((x) >> 21) & 0x03)
+#define C_028644_DEFAULT_VAL_ATTR1 0xFF9FFFFF
+#define S_028644_PT_SPRITE_TEX_ATTR1(x) (((x) & 0x1) << 23)
+#define G_028644_PT_SPRITE_TEX_ATTR1(x) (((x) >> 23) & 0x1)
+#define C_028644_PT_SPRITE_TEX_ATTR1 0xFF7FFFFF
+#define S_028644_ATTR0_VALID(x) (((x) & 0x1) << 24)
+#define G_028644_ATTR0_VALID(x) (((x) >> 24) & 0x1)
+#define C_028644_ATTR0_VALID 0xFEFFFFFF
+#define S_028644_ATTR1_VALID(x) (((x) & 0x1) << 25)
+#define G_028644_ATTR1_VALID(x) (((x) >> 25) & 0x1)
+#define C_028644_ATTR1_VALID 0xFDFFFFFF
+/* */
#define R_028648_SPI_PS_INPUT_CNTL_1 0x028648
#define R_02864C_SPI_PS_INPUT_CNTL_2 0x02864C
#define R_028650_SPI_PS_INPUT_CNTL_3 0x028650
@@ -6559,6 +8456,10 @@
#define R_028794_CB_BLEND5_CONTROL 0x028794
#define R_028798_CB_BLEND6_CONTROL 0x028798
#define R_02879C_CB_BLEND7_CONTROL 0x02879C
+#define R_0287CC_CS_COPY_STATE 0x0287CC
+#define S_0287CC_SRC_STATE_ID(x) (((x) & 0x07) << 0)
+#define G_0287CC_SRC_STATE_ID(x) (((x) >> 0) & 0x07)
+#define C_0287CC_SRC_STATE_ID 0xFFFFFFF8
#define R_0287D4_PA_CL_POINT_X_RAD 0x0287D4
#define R_0287D8_PA_CL_POINT_Y_RAD 0x0287D8
#define R_0287DC_PA_CL_POINT_SIZE 0x0287DC
@@ -6588,6 +8489,10 @@
#define G_0287F0_USE_OPAQUE(x) (((x) >> 6) & 0x1)
#define C_0287F0_USE_OPAQUE 0xFFFFFFBF
#define R_0287F4_VGT_IMMED_DATA 0x0287F4 /* not on CIK */
+#define R_0287F8_VGT_EVENT_ADDRESS_REG 0x0287F8
+#define S_0287F8_ADDRESS_LOW(x) (((x) & 0xFFFFFFF) << 0)
+#define G_0287F8_ADDRESS_LOW(x) (((x) >> 0) & 0xFFFFFFF)
+#define C_0287F8_ADDRESS_LOW 0xF0000000
#define R_028800_DB_DEPTH_CONTROL 0x028800
#define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0)
#define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1)
@@ -6644,36 +8549,42 @@
#define G_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS(x) (((x) >> 31) & 0x1)
#define C_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS 0x7FFFFFFF
#define R_028804_DB_EQAA 0x028804
-#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0)
-#define G_028804_MAX_ANCHOR_SAMPLES(x) (((x) >> 0) & 0x7)
-#define C_028804_MAX_ANCHOR_SAMPLES (~(((~0) & 0x7) << 0))
-#define S_028804_PS_ITER_SAMPLES(x) (((x) & 0x7) << 4)
-#define G_028804_PS_ITER_SAMPLES(x) (((x) >> 4) & 0x7)
-#define C_028804_PS_ITER_SAMPLES (~(((~0) & 0x7) << 4))
-#define S_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) & 0x7) << 8)
-#define G_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) >> 8) & 0x7)
-#define C_028804_MASK_EXPORT_NUM_SAMPLES (~(((~0) & 0x7) << 8))
-#define S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) & 0x7) << 12)
-#define G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) >> 12) & 0x7)
-#define C_028804_ALPHA_TO_MASK_NUM_SAMPLES (~(((~0) & 0x7) << 12))
-#define S_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) & 0x1) << 16)
-#define G_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) >> 16) & 0x1)
-#define C_028804_HIGH_QUALITY_INTERSECTIONS (~(((~0) & 0x1) << 16))
-#define S_028804_INCOHERENT_EQAA_READS(x) (((x) & 0x1) << 17)
-#define G_028804_INCOHERENT_EQAA_READS(x) (((x) >> 17) & 0x1)
-#define C_028804_INCOHERENT_EQAA_READS (~(((~0) & 0x1) << 17))
-#define S_028804_INTERPOLATE_COMP_Z(x) (((x) & 0x1) << 18)
-#define G_028804_INTERPOLATE_COMP_Z(x) (((x) >> 18) & 0x1)
-#define C_028804_INTERPOLATE_COMP_Z (~(((~0) >> 18) & 0x1))
-#define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19)
-#define G_028804_INTERPOLATE_SRC_Z(x) (((x) >> 19) & 0x1)
-#define C_028804_INTERPOLATE_SRC_Z (~(((~0) & 0x1) << 19))
-#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20)
-#define G_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) >> 20) & 0x1)
-#define C_028804_STATIC_ANCHOR_ASSOCIATIONS (~(((~0) & 0x1) << 20))
-#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21)
-#define G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) >> 21) & 0x1)
-#define C_028804_ALPHA_TO_MASK_EQAA_DISABLE (~(((~0) & 0x1) << 21))
+#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0)
+#define G_028804_MAX_ANCHOR_SAMPLES(x) (((x) >> 0) & 0x07)
+#define C_028804_MAX_ANCHOR_SAMPLES 0xFFFFFFF8
+#define S_028804_PS_ITER_SAMPLES(x) (((x) & 0x7) << 4)
+#define G_028804_PS_ITER_SAMPLES(x) (((x) >> 4) & 0x07)
+#define C_028804_PS_ITER_SAMPLES 0xFFFFFF8F
+#define S_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) & 0x7) << 8)
+#define G_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) >> 8) & 0x07)
+#define C_028804_MASK_EXPORT_NUM_SAMPLES 0xFFFFF8FF
+#define S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) & 0x7) << 12)
+#define G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) >> 12) & 0x07)
+#define C_028804_ALPHA_TO_MASK_NUM_SAMPLES 0xFFFF8FFF
+#define S_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) & 0x1) << 16)
+#define G_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) >> 16) & 0x1)
+#define C_028804_HIGH_QUALITY_INTERSECTIONS 0xFFFEFFFF
+#define S_028804_INCOHERENT_EQAA_READS(x) (((x) & 0x1) << 17)
+#define G_028804_INCOHERENT_EQAA_READS(x) (((x) >> 17) & 0x1)
+#define C_028804_INCOHERENT_EQAA_READS 0xFFFDFFFF
+#define S_028804_INTERPOLATE_COMP_Z(x) (((x) & 0x1) << 18)
+#define G_028804_INTERPOLATE_COMP_Z(x) (((x) >> 18) & 0x1)
+#define C_028804_INTERPOLATE_COMP_Z 0xFFFBFFFF
+#define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19)
+#define G_028804_INTERPOLATE_SRC_Z(x) (((x) >> 19) & 0x1)
+#define C_028804_INTERPOLATE_SRC_Z 0xFFF7FFFF
+#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20)
+#define G_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) >> 20) & 0x1)
+#define C_028804_STATIC_ANCHOR_ASSOCIATIONS 0xFFEFFFFF
+#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21)
+#define G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) >> 21) & 0x1)
+#define C_028804_ALPHA_TO_MASK_EQAA_DISABLE 0xFFDFFFFF
+#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((x) & 0x07) << 24)
+#define G_028804_OVERRASTERIZATION_AMOUNT(x) (((x) >> 24) & 0x07)
+#define C_028804_OVERRASTERIZATION_AMOUNT 0xF8FFFFFF
+#define S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) & 0x1) << 27)
+#define G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) >> 27) & 0x1)
+#define C_028804_ENABLE_POSTZ_OVERRASTERIZATION 0xF7FFFFFF
#define R_028808_CB_COLOR_CONTROL 0x028808
#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
@@ -6977,6 +8888,11 @@
#define S_02881C_USE_VTX_GS_CUT_FLAG(x) (((x) & 0x1) << 25)
#define G_02881C_USE_VTX_GS_CUT_FLAG(x) (((x) >> 25) & 0x1)
#define C_02881C_USE_VTX_GS_CUT_FLAG 0xFDFFFFFF
+/* VI */
+#define S_02881C_USE_VTX_LINE_WIDTH(x) (((x) & 0x1) << 26)
+#define G_02881C_USE_VTX_LINE_WIDTH(x) (((x) >> 26) & 0x1)
+#define C_02881C_USE_VTX_LINE_WIDTH 0xFBFFFFFF
+/* */
#define R_028820_PA_CL_NANINF_CNTL 0x028820
#define S_028820_VTE_XY_INF_DISCARD(x) (((x) & 0x1) << 0)
#define G_028820_VTE_XY_INF_DISCARD(x) (((x) >> 0) & 0x1)
@@ -7447,9 +9363,21 @@
#define S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 16)
#define G_028A4C_PS_ITER_SAMPLE(x) (((x) >> 16) & 0x1)
#define C_028A4C_PS_ITER_SAMPLE 0xFFFEFFFF
-#define S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC(x) (((x) & 0x1) << 17)
-#define G_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC(x) (((x) >> 17) & 0x1)
-#define C_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC 0xFFFDFFFF
+#define S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(x) (((x) & 0x1) << 17)
+#define G_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(x) (((x) >> 17) & 0x1)
+#define C_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE 0xFFFDFFFF
+#define S_028A4C_MULTI_GPU_SUPERTILE_ENABLE(x) (((x) & 0x1) << 18)
+#define G_028A4C_MULTI_GPU_SUPERTILE_ENABLE(x) (((x) >> 18) & 0x1)
+#define C_028A4C_MULTI_GPU_SUPERTILE_ENABLE 0xFFFBFFFF
+#define S_028A4C_GPU_ID_OVERRIDE_ENABLE(x) (((x) & 0x1) << 19)
+#define G_028A4C_GPU_ID_OVERRIDE_ENABLE(x) (((x) >> 19) & 0x1)
+#define C_028A4C_GPU_ID_OVERRIDE_ENABLE 0xFFF7FFFF
+#define S_028A4C_GPU_ID_OVERRIDE(x) (((x) & 0x0F) << 20)
+#define G_028A4C_GPU_ID_OVERRIDE(x) (((x) >> 20) & 0x0F)
+#define C_028A4C_GPU_ID_OVERRIDE 0xFF0FFFFF
+#define S_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(x) (((x) & 0x1) << 24)
+#define G_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(x) (((x) >> 24) & 0x1)
+#define C_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE 0xFEFFFFFF
#define S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) & 0x1) << 25)
#define G_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) >> 25) & 0x1)
#define C_028A4C_FORCE_EOV_CNTDWN_ENABLE 0xFDFFFFFF
@@ -7515,6 +9443,7 @@
#define C_028A7C_INDEX_TYPE 0xFFFFFFFC
#define V_028A7C_VGT_INDEX_16 0x00
#define V_028A7C_VGT_INDEX_32 0x01
+#define V_028A7C_VGT_INDEX_8 0x02 /* VI */
#define S_028A7C_SWAP_MODE(x) (((x) & 0x03) << 2)
#define G_028A7C_SWAP_MODE(x) (((x) >> 2) & 0x03)
#define C_028A7C_SWAP_MODE 0xFFFFFFF3
@@ -7544,6 +9473,12 @@
#define G_028A7C_REQ_PATH(x) (((x) >> 10) & 0x1)
#define C_028A7C_REQ_PATH 0xFFFFFBFF
/* */
+/* VI */
+#define S_028A7C_MTYPE(x) (((x) & 0x03) << 11)
+#define G_028A7C_MTYPE(x) (((x) >> 11) & 0x03)
+#define C_028A7C_MTYPE 0xFFFFE7FF
+/* */
+#define R_028A80_WD_ENHANCE 0x028A80
#define R_028A84_VGT_PRIMITIVEID_EN 0x028A84
#define S_028A84_PRIMITIVEID_EN(x) (((x) & 0x1) << 0)
#define G_028A84_PRIMITIVEID_EN(x) (((x) >> 0) & 0x1)
@@ -7642,6 +9577,10 @@
#define S_028AA8_WD_SWITCH_ON_EOP(x) (((x) & 0x1) << 20)
#define G_028AA8_WD_SWITCH_ON_EOP(x) (((x) >> 20) & 0x1)
#define C_028AA8_WD_SWITCH_ON_EOP 0xFFEFFFFF
+/* VI */
+#define S_028AA8_MAX_PRIMGRP_IN_WAVE(x) (((x) & 0x0F) << 28)
+#define G_028AA8_MAX_PRIMGRP_IN_WAVE(x) (((x) >> 28) & 0x0F)
+#define C_028AA8_MAX_PRIMGRP_IN_WAVE 0x0FFFFFFF
/* */
#define R_028AAC_VGT_ESGS_RING_ITEMSIZE 0x028AAC
#define S_028AAC_ITEMSIZE(x) (((x) & 0x7FFF) << 0)
@@ -7681,6 +9620,11 @@
#define S_028ABC_DST_OUTSIDE_ZERO_TO_ONE(x) (((x) & 0x1) << 16)
#define G_028ABC_DST_OUTSIDE_ZERO_TO_ONE(x) (((x) >> 16) & 0x1)
#define C_028ABC_DST_OUTSIDE_ZERO_TO_ONE 0xFFFEFFFF
+/* VI */
+#define S_028ABC_TC_COMPATIBLE(x) (((x) & 0x1) << 17)
+#define G_028ABC_TC_COMPATIBLE(x) (((x) >> 17) & 0x1)
+#define C_028ABC_TC_COMPATIBLE 0xFFFDFFFF
+/* */
#define R_028AC0_DB_SRESULTS_COMPARE_STATE0 0x028AC0
#define S_028AC0_COMPAREFUNC0(x) (((x) & 0x07) << 0)
#define G_028AC0_COMPAREFUNC0(x) (((x) >> 0) & 0x07)
@@ -7770,6 +9714,21 @@
#define S_028B38_MAX_VERT_OUT(x) (((x) & 0x7FF) << 0)
#define G_028B38_MAX_VERT_OUT(x) (((x) >> 0) & 0x7FF)
#define C_028B38_MAX_VERT_OUT 0xFFFFF800
+/* VI */
+#define R_028B50_VGT_TESS_DISTRIBUTION 0x028B50
+#define S_028B50_ACCUM_ISOLINE(x) (((x) & 0xFF) << 0)
+#define G_028B50_ACCUM_ISOLINE(x) (((x) >> 0) & 0xFF)
+#define C_028B50_ACCUM_ISOLINE 0xFFFFFF00
+#define S_028B50_ACCUM_TRI(x) (((x) & 0xFF) << 8)
+#define G_028B50_ACCUM_TRI(x) (((x) >> 8) & 0xFF)
+#define C_028B50_ACCUM_TRI 0xFFFF00FF
+#define S_028B50_ACCUM_QUAD(x) (((x) & 0xFF) << 16)
+#define G_028B50_ACCUM_QUAD(x) (((x) >> 16) & 0xFF)
+#define C_028B50_ACCUM_QUAD 0xFF00FFFF
+#define S_028B50_DONUT_SPLIT(x) (((x) & 0xFF) << 24)
+#define G_028B50_DONUT_SPLIT(x) (((x) >> 24) & 0xFF)
+#define C_028B50_DONUT_SPLIT 0x00FFFFFF
+/* */
#define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
#define S_028B54_LS_EN(x) (((x) & 0x03) << 0)
#define G_028B54_LS_EN(x) (((x) >> 0) & 0x03)
@@ -7798,6 +9757,20 @@
#define S_028B54_DYNAMIC_HS(x) (((x) & 0x1) << 8)
#define G_028B54_DYNAMIC_HS(x) (((x) >> 8) & 0x1)
#define C_028B54_DYNAMIC_HS 0xFFFFFEFF
+/* VI */
+#define S_028B54_DISPATCH_DRAW_EN(x) (((x) & 0x1) << 9)
+#define G_028B54_DISPATCH_DRAW_EN(x) (((x) >> 9) & 0x1)
+#define C_028B54_DISPATCH_DRAW_EN 0xFFFFFDFF
+#define S_028B54_DIS_DEALLOC_ACCUM_0(x) (((x) & 0x1) << 10)
+#define G_028B54_DIS_DEALLOC_ACCUM_0(x) (((x) >> 10) & 0x1)
+#define C_028B54_DIS_DEALLOC_ACCUM_0 0xFFFFFBFF
+#define S_028B54_DIS_DEALLOC_ACCUM_1(x) (((x) & 0x1) << 11)
+#define G_028B54_DIS_DEALLOC_ACCUM_1(x) (((x) >> 11) & 0x1)
+#define C_028B54_DIS_DEALLOC_ACCUM_1 0xFFFFF7FF
+#define S_028B54_VS_WAVE_ID_EN(x) (((x) & 0x1) << 12)
+#define G_028B54_VS_WAVE_ID_EN(x) (((x) >> 12) & 0x1)
+#define C_028B54_VS_WAVE_ID_EN 0xFFFFEFFF
+/* */
#define R_028B58_VGT_LS_HS_CONFIG 0x028B58
#define S_028B58_NUM_PATCHES(x) (((x) & 0xFF) << 0)
#define G_028B58_NUM_PATCHES(x) (((x) >> 0) & 0xFF)
@@ -7848,6 +9821,9 @@
#define S_028B6C_RESERVED_REDUC_AXIS(x) (((x) & 0x1) << 8) /* not on CIK */
#define G_028B6C_RESERVED_REDUC_AXIS(x) (((x) >> 8) & 0x1) /* not on CIK */
#define C_028B6C_RESERVED_REDUC_AXIS 0xFFFFFEFF /* not on CIK */
+#define S_028B6C_DEPRECATED(x) (((x) & 0x1) << 9)
+#define G_028B6C_DEPRECATED(x) (((x) >> 9) & 0x1)
+#define C_028B6C_DEPRECATED 0xFFFFFDFF
#define S_028B6C_NUM_DS_WAVES_PER_SIMD(x) (((x) & 0x0F) << 10)
#define G_028B6C_NUM_DS_WAVES_PER_SIMD(x) (((x) >> 10) & 0x0F)
#define C_028B6C_NUM_DS_WAVES_PER_SIMD 0xFFFFC3FF
@@ -7862,6 +9838,14 @@
#define V_028B6C_VGT_POLICY_STREAM 0x01
#define V_028B6C_VGT_POLICY_BYPASS 0x02
/* */
+/* VI */
+#define S_028B6C_DISTRIBUTION_MODE(x) (((x) & 0x03) << 17)
+#define G_028B6C_DISTRIBUTION_MODE(x) (((x) >> 17) & 0x03)
+#define C_028B6C_DISTRIBUTION_MODE 0xFFF9FFFF
+#define S_028B6C_MTYPE(x) (((x) & 0x03) << 19)
+#define G_028B6C_MTYPE(x) (((x) >> 19) & 0x03)
+#define C_028B6C_MTYPE 0xFFE7FFFF
+/* */
#define R_028B70_DB_ALPHA_TO_MASK 0x028B70
#define S_028B70_ALPHA_TO_MASK_ENABLE(x) (((x) & 0x1) << 0)
#define G_028B70_ALPHA_TO_MASK_ENABLE(x) (((x) >> 0) & 0x1)
@@ -8001,6 +9985,22 @@
#define S_028BDC_DX10_DIAMOND_TEST_ENA(x) (((x) & 0x1) << 12)
#define G_028BDC_DX10_DIAMOND_TEST_ENA(x) (((x) >> 12) & 0x1)
#define C_028BDC_DX10_DIAMOND_TEST_ENA 0xFFFFEFFF
+#define R_028BE0_PA_SC_AA_CONFIG 0x028BE0
+#define S_028BE0_MSAA_NUM_SAMPLES(x) (((x) & 0x7) << 0)
+#define G_028BE0_MSAA_NUM_SAMPLES(x) (((x) >> 0) & 0x07)
+#define C_028BE0_MSAA_NUM_SAMPLES 0xFFFFFFF8
+#define S_028BE0_AA_MASK_CENTROID_DTMN(x) (((x) & 0x1) << 4)
+#define G_028BE0_AA_MASK_CENTROID_DTMN(x) (((x) >> 4) & 0x1)
+#define C_028BE0_AA_MASK_CENTROID_DTMN 0xFFFFFFEF
+#define S_028BE0_MAX_SAMPLE_DIST(x) (((x) & 0xf) << 13)
+#define G_028BE0_MAX_SAMPLE_DIST(x) (((x) >> 13) & 0x0F)
+#define C_028BE0_MAX_SAMPLE_DIST 0xFFFE1FFF
+#define S_028BE0_MSAA_EXPOSED_SAMPLES(x) (((x) & 0x7) << 20)
+#define G_028BE0_MSAA_EXPOSED_SAMPLES(x) (((x) >> 20) & 0x07)
+#define C_028BE0_MSAA_EXPOSED_SAMPLES 0xFF8FFFFF
+#define S_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((x) & 0x3) << 24)
+#define G_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((x) >> 24) & 0x03)
+#define C_028BE0_DETAIL_TO_EXPOSED_MODE 0xFCFFFFFF
#define R_028BE4_PA_SU_VTX_CNTL 0x028BE4
#define S_028BE4_PIX_CENTER(x) (((x) & 0x1) << 0)
#define G_028BE4_PIX_CENTER(x) (((x) >> 0) & 0x1)
@@ -8569,6 +10569,17 @@
#define G_028C70_FMASK_COMPRESSION_DISABLE(x) (((x) >> 26) & 0x1)
#define C_028C70_FMASK_COMPRESSION_DISABLE 0xFBFFFFFF
/* */
+/* VI */
+#define S_028C70_FMASK_COMPRESS_1FRAG_ONLY(x) (((x) & 0x1) << 27)
+#define G_028C70_FMASK_COMPRESS_1FRAG_ONLY(x) (((x) >> 27) & 0x1)
+#define C_028C70_FMASK_COMPRESS_1FRAG_ONLY 0xF7FFFFFF
+#define S_028C70_DCC_ENABLE(x) (((x) & 0x1) << 28)
+#define G_028C70_DCC_ENABLE(x) (((x) >> 28) & 0x1)
+#define C_028C70_DCC_ENABLE 0xEFFFFFFF
+#define S_028C70_CMASK_ADDR_TYPE(x) (((x) & 0x03) << 29)
+#define G_028C70_CMASK_ADDR_TYPE(x) (((x) >> 29) & 0x03)
+#define C_028C70_CMASK_ADDR_TYPE 0x9FFFFFFF
+/* */
#define R_028C74_CB_COLOR0_ATTRIB 0x028C74
#define S_028C74_TILE_MODE_INDEX(x) (((x) & 0x1F) << 0)
#define G_028C74_TILE_MODE_INDEX(x) (((x) >> 0) & 0x1F)
@@ -8576,7 +10587,9 @@
#define S_028C74_FMASK_TILE_MODE_INDEX(x) (((x) & 0x1F) << 5)
#define G_028C74_FMASK_TILE_MODE_INDEX(x) (((x) >> 5) & 0x1F)
#define C_028C74_FMASK_TILE_MODE_INDEX 0xFFFFFC1F
-#define S_028C74_FMASK_BANK_HEIGHT(x) (((x) & 0x3) << 10) /* SI errata */
+#define S_028C74_FMASK_BANK_HEIGHT(x) (((x) & 0x03) << 10)
+#define G_028C74_FMASK_BANK_HEIGHT(x) (((x) >> 10) & 0x03)
+#define C_028C74_FMASK_BANK_HEIGHT 0xFFFFF3FF
#define S_028C74_NUM_SAMPLES(x) (((x) & 0x07) << 12)
#define G_028C74_NUM_SAMPLES(x) (((x) >> 12) & 0x07)
#define C_028C74_NUM_SAMPLES 0xFFFF8FFF
@@ -8586,6 +10599,36 @@
#define S_028C74_FORCE_DST_ALPHA_1(x) (((x) & 0x1) << 17)
#define G_028C74_FORCE_DST_ALPHA_1(x) (((x) >> 17) & 0x1)
#define C_028C74_FORCE_DST_ALPHA_1 0xFFFDFFFF
+/* VI */
+#define R_028C78_CB_COLOR0_DCC_CONTROL 0x028C78
+#define S_028C78_OVERWRITE_COMBINER_DISABLE(x) (((x) & 0x1) << 0)
+#define G_028C78_OVERWRITE_COMBINER_DISABLE(x) (((x) >> 0) & 0x1)
+#define C_028C78_OVERWRITE_COMBINER_DISABLE 0xFFFFFFFE
+#define S_028C78_KEY_CLEAR_ENABLE(x) (((x) & 0x1) << 1)
+#define G_028C78_KEY_CLEAR_ENABLE(x) (((x) >> 1) & 0x1)
+#define C_028C78_KEY_CLEAR_ENABLE 0xFFFFFFFD
+#define S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(x) (((x) & 0x03) << 2)
+#define G_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(x) (((x) >> 2) & 0x03)
+#define C_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE 0xFFFFFFF3
+#define S_028C78_MIN_COMPRESSED_BLOCK_SIZE(x) (((x) & 0x1) << 4)
+#define G_028C78_MIN_COMPRESSED_BLOCK_SIZE(x) (((x) >> 4) & 0x1)
+#define C_028C78_MIN_COMPRESSED_BLOCK_SIZE 0xFFFFFFEF
+#define S_028C78_MAX_COMPRESSED_BLOCK_SIZE(x) (((x) & 0x03) << 5)
+#define G_028C78_MAX_COMPRESSED_BLOCK_SIZE(x) (((x) >> 5) & 0x03)
+#define C_028C78_MAX_COMPRESSED_BLOCK_SIZE 0xFFFFFF9F
+#define S_028C78_COLOR_TRANSFORM(x) (((x) & 0x03) << 7)
+#define G_028C78_COLOR_TRANSFORM(x) (((x) >> 7) & 0x03)
+#define C_028C78_COLOR_TRANSFORM 0xFFFFFE7F
+#define S_028C78_INDEPENDENT_64B_BLOCKS(x) (((x) & 0x1) << 9)
+#define G_028C78_INDEPENDENT_64B_BLOCKS(x) (((x) >> 9) & 0x1)
+#define C_028C78_INDEPENDENT_64B_BLOCKS 0xFFFFFDFF
+#define S_028C78_LOSSY_RGB_PRECISION(x) (((x) & 0x0F) << 10)
+#define G_028C78_LOSSY_RGB_PRECISION(x) (((x) >> 10) & 0x0F)
+#define C_028C78_LOSSY_RGB_PRECISION 0xFFFFC3FF
+#define S_028C78_LOSSY_ALPHA_PRECISION(x) (((x) & 0x0F) << 14)
+#define G_028C78_LOSSY_ALPHA_PRECISION(x) (((x) >> 14) & 0x0F)
+#define C_028C78_LOSSY_ALPHA_PRECISION 0xFFFC3FFF
+/* */
#define R_028C7C_CB_COLOR0_CMASK 0x028C7C
#define R_028C80_CB_COLOR0_CMASK_SLICE 0x028C80
#define S_028C80_TILE_MAX(x) (((x) & 0x3FFF) << 0)
@@ -8598,90 +10641,105 @@
#define C_028C88_TILE_MAX 0xFFC00000
#define R_028C8C_CB_COLOR0_CLEAR_WORD0 0x028C8C
#define R_028C90_CB_COLOR0_CLEAR_WORD1 0x028C90
+#define R_028C94_CB_COLOR0_DCC_BASE 0x028C94 /* VI */
#define R_028C9C_CB_COLOR1_BASE 0x028C9C
#define R_028CA0_CB_COLOR1_PITCH 0x028CA0
#define R_028CA4_CB_COLOR1_SLICE 0x028CA4
#define R_028CA8_CB_COLOR1_VIEW 0x028CA8
#define R_028CAC_CB_COLOR1_INFO 0x028CAC
#define R_028CB0_CB_COLOR1_ATTRIB 0x028CB0
-#define R_028CD4_CB_COLOR1_CMASK 0x028CB8
+#define R_028CB4_CB_COLOR1_DCC_CONTROL 0x028CB4 /* VI */
+#define R_028CB8_CB_COLOR1_CMASK 0x028CB8
#define R_028CBC_CB_COLOR1_CMASK_SLICE 0x028CBC
#define R_028CC0_CB_COLOR1_FMASK 0x028CC0
#define R_028CC4_CB_COLOR1_FMASK_SLICE 0x028CC4
#define R_028CC8_CB_COLOR1_CLEAR_WORD0 0x028CC8
#define R_028CCC_CB_COLOR1_CLEAR_WORD1 0x028CCC
+#define R_028CD0_CB_COLOR1_DCC_BASE 0x028CD0 /* VI */
#define R_028CD8_CB_COLOR2_BASE 0x028CD8
#define R_028CDC_CB_COLOR2_PITCH 0x028CDC
#define R_028CE0_CB_COLOR2_SLICE 0x028CE0
#define R_028CE4_CB_COLOR2_VIEW 0x028CE4
#define R_028CE8_CB_COLOR2_INFO 0x028CE8
#define R_028CEC_CB_COLOR2_ATTRIB 0x028CEC
+#define R_028CF0_CB_COLOR2_DCC_CONTROL 0x028CF0 /* VI */
#define R_028CF4_CB_COLOR2_CMASK 0x028CF4
#define R_028CF8_CB_COLOR2_CMASK_SLICE 0x028CF8
#define R_028CFC_CB_COLOR2_FMASK 0x028CFC
#define R_028D00_CB_COLOR2_FMASK_SLICE 0x028D00
#define R_028D04_CB_COLOR2_CLEAR_WORD0 0x028D04
#define R_028D08_CB_COLOR2_CLEAR_WORD1 0x028D08
+#define R_028D0C_CB_COLOR2_DCC_BASE 0x028D0C /* VI */
#define R_028D14_CB_COLOR3_BASE 0x028D14
#define R_028D18_CB_COLOR3_PITCH 0x028D18
#define R_028D1C_CB_COLOR3_SLICE 0x028D1C
#define R_028D20_CB_COLOR3_VIEW 0x028D20
#define R_028D24_CB_COLOR3_INFO 0x028D24
#define R_028D28_CB_COLOR3_ATTRIB 0x028D28
+#define R_028D2C_CB_COLOR3_DCC_CONTROL 0x028D2C /* VI */
#define R_028D30_CB_COLOR3_CMASK 0x028D30
#define R_028D34_CB_COLOR3_CMASK_SLICE 0x028D34
#define R_028D38_CB_COLOR3_FMASK 0x028D38
#define R_028D3C_CB_COLOR3_FMASK_SLICE 0x028D3C
#define R_028D40_CB_COLOR3_CLEAR_WORD0 0x028D40
#define R_028D44_CB_COLOR3_CLEAR_WORD1 0x028D44
+#define R_028D48_CB_COLOR3_DCC_BASE 0x028D48 /* VI */
#define R_028D50_CB_COLOR4_BASE 0x028D50
#define R_028D54_CB_COLOR4_PITCH 0x028D54
#define R_028D58_CB_COLOR4_SLICE 0x028D58
#define R_028D5C_CB_COLOR4_VIEW 0x028D5C
#define R_028D60_CB_COLOR4_INFO 0x028D60
#define R_028D64_CB_COLOR4_ATTRIB 0x028D64
+#define R_028D68_CB_COLOR4_DCC_CONTROL 0x028D68 /* VI */
#define R_028D6C_CB_COLOR4_CMASK 0x028D6C
#define R_028D70_CB_COLOR4_CMASK_SLICE 0x028D70
#define R_028D74_CB_COLOR4_FMASK 0x028D74
#define R_028D78_CB_COLOR4_FMASK_SLICE 0x028D78
#define R_028D7C_CB_COLOR4_CLEAR_WORD0 0x028D7C
#define R_028D80_CB_COLOR4_CLEAR_WORD1 0x028D80
+#define R_028D84_CB_COLOR4_DCC_BASE 0x028D84 /* VI */
#define R_028D8C_CB_COLOR5_BASE 0x028D8C
#define R_028D90_CB_COLOR5_PITCH 0x028D90
#define R_028D94_CB_COLOR5_SLICE 0x028D94
#define R_028D98_CB_COLOR5_VIEW 0x028D98
#define R_028D9C_CB_COLOR5_INFO 0x028D9C
#define R_028DA0_CB_COLOR5_ATTRIB 0x028DA0
+#define R_028DA4_CB_COLOR5_DCC_CONTROL 0x028DA4 /* VI */
#define R_028DA8_CB_COLOR5_CMASK 0x028DA8
#define R_028DAC_CB_COLOR5_CMASK_SLICE 0x028DAC
#define R_028DB0_CB_COLOR5_FMASK 0x028DB0
#define R_028DB4_CB_COLOR5_FMASK_SLICE 0x028DB4
#define R_028DB8_CB_COLOR5_CLEAR_WORD0 0x028DB8
#define R_028DBC_CB_COLOR5_CLEAR_WORD1 0x028DBC
+#define R_028DC0_CB_COLOR5_DCC_BASE 0x028DC0 /* VI */
#define R_028DC8_CB_COLOR6_BASE 0x028DC8
#define R_028DCC_CB_COLOR6_PITCH 0x028DCC
#define R_028DD0_CB_COLOR6_SLICE 0x028DD0
#define R_028DD4_CB_COLOR6_VIEW 0x028DD4
#define R_028DD8_CB_COLOR6_INFO 0x028DD8
#define R_028DDC_CB_COLOR6_ATTRIB 0x028DDC
+#define R_028DE0_CB_COLOR6_DCC_CONTROL 0x028DE0 /* VI */
#define R_028DE4_CB_COLOR6_CMASK 0x028DE4
#define R_028DE8_CB_COLOR6_CMASK_SLICE 0x028DE8
#define R_028DEC_CB_COLOR6_FMASK 0x028DEC
#define R_028DF0_CB_COLOR6_FMASK_SLICE 0x028DF0
#define R_028DF4_CB_COLOR6_CLEAR_WORD0 0x028DF4
#define R_028DF8_CB_COLOR6_CLEAR_WORD1 0x028DF8
+#define R_028DFC_CB_COLOR6_DCC_BASE 0x028DFC /* VI */
#define R_028E04_CB_COLOR7_BASE 0x028E04
#define R_028E08_CB_COLOR7_PITCH 0x028E08
#define R_028E0C_CB_COLOR7_SLICE 0x028E0C
#define R_028E10_CB_COLOR7_VIEW 0x028E10
#define R_028E14_CB_COLOR7_INFO 0x028E14
#define R_028E18_CB_COLOR7_ATTRIB 0x028E18
+#define R_028E1C_CB_COLOR7_DCC_CONTROL 0x028E1C /* VI */
#define R_028E20_CB_COLOR7_CMASK 0x028E20
#define R_028E24_CB_COLOR7_CMASK_SLICE 0x028E24
#define R_028E28_CB_COLOR7_FMASK 0x028E28
#define R_028E2C_CB_COLOR7_FMASK_SLICE 0x028E2C
#define R_028E30_CB_COLOR7_CLEAR_WORD0 0x028E30
#define R_028E34_CB_COLOR7_CLEAR_WORD1 0x028E34
+#define R_028E38_CB_COLOR7_DCC_BASE 0x028E38 /* VI */
/* SI async DMA packets */
#define SI_DMA_PACKET(cmd, sub_cmd, n) ((((cmd) & 0xF) << 28) | \
diff --git a/src/gallium/drivers/rbug/rbug_context.h b/src/gallium/drivers/rbug/rbug_context.h
index 5e7b9d4dee4..e99f6edc523 100644
--- a/src/gallium/drivers/rbug/rbug_context.h
+++ b/src/gallium/drivers/rbug/rbug_context.h
@@ -79,7 +79,7 @@ struct rbug_context {
struct rbug_list shaders;
};
-static INLINE struct rbug_context *
+static inline struct rbug_context *
rbug_context(struct pipe_context *pipe)
{
return (struct rbug_context *)pipe;
diff --git a/src/gallium/drivers/rbug/rbug_objects.h b/src/gallium/drivers/rbug/rbug_objects.h
index 3fba3334228..02973e07996 100644
--- a/src/gallium/drivers/rbug/rbug_objects.h
+++ b/src/gallium/drivers/rbug/rbug_objects.h
@@ -93,7 +93,7 @@ struct rbug_transfer
};
-static INLINE struct rbug_resource *
+static inline struct rbug_resource *
rbug_resource(struct pipe_resource *_resource)
{
if (!_resource)
@@ -102,7 +102,7 @@ rbug_resource(struct pipe_resource *_resource)
return (struct rbug_resource *)_resource;
}
-static INLINE struct rbug_sampler_view *
+static inline struct rbug_sampler_view *
rbug_sampler_view(struct pipe_sampler_view *_sampler_view)
{
if (!_sampler_view)
@@ -111,7 +111,7 @@ rbug_sampler_view(struct pipe_sampler_view *_sampler_view)
return (struct rbug_sampler_view *)_sampler_view;
}
-static INLINE struct rbug_surface *
+static inline struct rbug_surface *
rbug_surface(struct pipe_surface *_surface)
{
if (!_surface)
@@ -120,7 +120,7 @@ rbug_surface(struct pipe_surface *_surface)
return (struct rbug_surface *)_surface;
}
-static INLINE struct rbug_transfer *
+static inline struct rbug_transfer *
rbug_transfer(struct pipe_transfer *_transfer)
{
if (!_transfer)
@@ -129,7 +129,7 @@ rbug_transfer(struct pipe_transfer *_transfer)
return (struct rbug_transfer *)_transfer;
}
-static INLINE struct rbug_shader *
+static inline struct rbug_shader *
rbug_shader(void *_state)
{
if (!_state)
@@ -137,7 +137,7 @@ rbug_shader(void *_state)
return (struct rbug_shader *)_state;
}
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
rbug_resource_unwrap(struct pipe_resource *_resource)
{
if (!_resource)
@@ -145,7 +145,7 @@ rbug_resource_unwrap(struct pipe_resource *_resource)
return rbug_resource(_resource)->resource;
}
-static INLINE struct pipe_sampler_view *
+static inline struct pipe_sampler_view *
rbug_sampler_view_unwrap(struct pipe_sampler_view *_sampler_view)
{
if (!_sampler_view)
@@ -153,7 +153,7 @@ rbug_sampler_view_unwrap(struct pipe_sampler_view *_sampler_view)
return rbug_sampler_view(_sampler_view)->sampler_view;
}
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
rbug_surface_unwrap(struct pipe_surface *_surface)
{
if (!_surface)
@@ -161,7 +161,7 @@ rbug_surface_unwrap(struct pipe_surface *_surface)
return rbug_surface(_surface)->surface;
}
-static INLINE struct pipe_transfer *
+static inline struct pipe_transfer *
rbug_transfer_unwrap(struct pipe_transfer *_transfer)
{
if (!_transfer)
@@ -169,7 +169,7 @@ rbug_transfer_unwrap(struct pipe_transfer *_transfer)
return rbug_transfer(_transfer)->transfer;
}
-static INLINE void *
+static inline void *
rbug_shader_unwrap(void *_state)
{
struct rbug_shader *shader;
diff --git a/src/gallium/drivers/rbug/rbug_screen.c b/src/gallium/drivers/rbug/rbug_screen.c
index d5a3164e217..7da4e81560a 100644
--- a/src/gallium/drivers/rbug/rbug_screen.c
+++ b/src/gallium/drivers/rbug/rbug_screen.c
@@ -226,17 +226,6 @@ rbug_screen_fence_reference(struct pipe_screen *_screen,
}
static boolean
-rbug_screen_fence_signalled(struct pipe_screen *_screen,
- struct pipe_fence_handle *fence)
-{
- struct rbug_screen *rb_screen = rbug_screen(_screen);
- struct pipe_screen *screen = rb_screen->screen;
-
- return screen->fence_signalled(screen,
- fence);
-}
-
-static boolean
rbug_screen_fence_finish(struct pipe_screen *_screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
@@ -288,7 +277,6 @@ rbug_screen_create(struct pipe_screen *screen)
rb_screen->base.resource_destroy = rbug_screen_resource_destroy;
rb_screen->base.flush_frontbuffer = rbug_screen_flush_frontbuffer;
rb_screen->base.fence_reference = rbug_screen_fence_reference;
- rb_screen->base.fence_signalled = rbug_screen_fence_signalled;
rb_screen->base.fence_finish = rbug_screen_fence_finish;
rb_screen->screen = screen;
diff --git a/src/gallium/drivers/rbug/rbug_screen.h b/src/gallium/drivers/rbug/rbug_screen.h
index a53afac05e9..fd92374beda 100644
--- a/src/gallium/drivers/rbug/rbug_screen.h
+++ b/src/gallium/drivers/rbug/rbug_screen.h
@@ -60,7 +60,7 @@ struct rbug_screen
struct rbug_list transfers;
};
-static INLINE struct rbug_screen *
+static inline struct rbug_screen *
rbug_screen(struct pipe_screen *screen)
{
return (struct rbug_screen *)screen;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 50a73369c1d..577df814b29 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -203,7 +203,7 @@ struct softpipe_context {
};
-static INLINE struct softpipe_context *
+static inline struct softpipe_context *
softpipe_context( struct pipe_context *pipe )
{
return (struct softpipe_context *)pipe;
diff --git a/src/gallium/drivers/softpipe/sp_fence.c b/src/gallium/drivers/softpipe/sp_fence.c
index c2897ed1ef8..6168236ec96 100644
--- a/src/gallium/drivers/softpipe/sp_fence.c
+++ b/src/gallium/drivers/softpipe/sp_fence.c
@@ -41,15 +41,6 @@ softpipe_fence_reference(struct pipe_screen *screen,
static boolean
-softpipe_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- assert(fence);
- return TRUE;
-}
-
-
-static boolean
softpipe_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
@@ -64,5 +55,4 @@ softpipe_init_screen_fence_funcs(struct pipe_screen *screen)
{
screen->fence_reference = softpipe_fence_reference;
screen->fence_finish = softpipe_fence_finish;
- screen->fence_signalled = softpipe_fence_signalled;
}
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 369ab6ed8d4..89411777ec9 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -52,7 +52,7 @@ struct sp_exec_fragment_shader
/** cast wrapper */
-static INLINE struct sp_exec_fragment_shader *
+static inline struct sp_exec_fragment_shader *
sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var)
{
return (struct sp_exec_fragment_shader *) var;
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 18eca611669..f8a3eacdb37 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -145,7 +145,7 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
}
-static INLINE cptrf4 get_vert( const void *vertex_buffer,
+static inline cptrf4 get_vert( const void *vertex_buffer,
int index,
int stride )
{
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index a32bd7fd241..5b458450cd8 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -63,7 +63,7 @@ struct blend_quad_stage
/** cast wrapper */
-static INLINE struct blend_quad_stage *
+static inline struct blend_quad_stage *
blend_quad_stage(struct quad_stage *stage)
{
return (struct blend_quad_stage *) stage;
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 82c58d04527..395bc70f2cf 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -56,7 +56,7 @@ struct quad_shade_stage
/** cast wrapper */
-static INLINE struct quad_shade_stage *
+static inline struct quad_shade_stage *
quad_shade_stage(struct quad_stage *qs)
{
return (struct quad_shade_stage *) qs;
@@ -67,7 +67,7 @@ quad_shade_stage(struct quad_stage *qs)
* Execute fragment shader for the four fragments in the quad.
* \return TRUE if quad is alive, FALSE if all four pixels are killed
*/
-static INLINE boolean
+static inline boolean
shade_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct softpipe_context *softpipe = qs->softpipe;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index a688d319bb8..0bfd9c3578c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -234,6 +234,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
return 1;
case PIPE_CAP_CLIP_HALFZ:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
case PIPE_CAP_VERTEXID_NOBASE:
return 0;
@@ -242,6 +244,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/softpipe/sp_screen.h b/src/gallium/drivers/softpipe/sp_screen.h
index d39e9f48e80..f0e929111c2 100644
--- a/src/gallium/drivers/softpipe/sp_screen.h
+++ b/src/gallium/drivers/softpipe/sp_screen.h
@@ -49,7 +49,7 @@ struct softpipe_screen {
boolean use_llvm;
};
-static INLINE struct softpipe_screen *
+static inline struct softpipe_screen *
softpipe_screen( struct pipe_screen *pipe )
{
return (struct softpipe_screen *)pipe;
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 6704015112b..ff3cb9fe5e1 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -125,7 +125,7 @@ struct setup_context {
/**
* Clip setup->quad against the scissor/surface bounds.
*/
-static INLINE void
+static inline void
quad_clip(struct setup_context *setup, struct quad_header *quad)
{
const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
@@ -156,7 +156,7 @@ quad_clip(struct setup_context *setup, struct quad_header *quad)
/**
* Emit a quad (pass to next stage) with clipping.
*/
-static INLINE void
+static inline void
clip_emit_quad(struct setup_context *setup, struct quad_header *quad)
{
quad_clip( setup, quad );
@@ -178,14 +178,14 @@ clip_emit_quad(struct setup_context *setup, struct quad_header *quad)
* Given an X or Y coordinate, return the block/quad coordinate that it
* belongs to.
*/
-static INLINE int
+static inline int
block(int x)
{
return x & ~(2-1);
}
-static INLINE int
+static inline int
block_x(int x)
{
return x & ~(16-1);
@@ -1039,7 +1039,7 @@ setup_line_coefficients(struct setup_context *setup,
/**
* Plot a pixel in a line segment.
*/
-static INLINE void
+static inline void
plot(struct setup_context *setup, int x, int y)
{
const int iy = y & 1;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 1010b63de2c..565fca632c6 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -58,7 +58,7 @@
* of improperly weighted linear-filtered textures.
* The tests/texwrap.c demo is a good test.
*/
-static INLINE float
+static inline float
frac(float f)
{
return f - floorf(f);
@@ -69,7 +69,7 @@ frac(float f)
/**
* Linear interpolation macro
*/
-static INLINE float
+static inline float
lerp(float a, float v0, float v1)
{
return v0 + a * (v1 - v0);
@@ -84,7 +84,7 @@ lerp(float a, float v0, float v1)
* optimization! If we find that's not true on some systems, convert
* to a macro.
*/
-static INLINE float
+static inline float
lerp_2d(float a, float b,
float v00, float v10, float v01, float v11)
{
@@ -97,7 +97,7 @@ lerp_2d(float a, float b,
/**
* As above, but 3D interpolation of 8 values.
*/
-static INLINE float
+static inline float
lerp_3d(float a, float b, float c,
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111)
@@ -115,7 +115,7 @@ lerp_3d(float a, float b, float c,
* value. To avoid that problem we add a large multiple of the size
* (rather than using a conditional).
*/
-static INLINE int
+static inline int
repeat(int coord, unsigned size)
{
return (coord + size * 1024) % size;
@@ -486,7 +486,7 @@ wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
/**
* Do coordinate to array index conversion. For array textures.
*/
-static INLINE int
+static inline int
coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
{
int c = util_ifloor(coord + 0.5F);
@@ -587,7 +587,7 @@ compute_lambda_vert(const struct sp_sampler_view *sview,
-static INLINE const float *
+static inline const float *
get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr, int x, int y)
{
@@ -603,7 +603,7 @@ get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
}
-static INLINE const float *
+static inline const float *
get_texel_2d(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y)
@@ -695,7 +695,7 @@ static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
};
-static INLINE unsigned
+static inline unsigned
get_next_face(unsigned face, int idx)
{
return face_array[face][idx];
@@ -705,7 +705,7 @@ get_next_face(unsigned face, int idx)
* return a new xcoord based on old face, old coords, cube size
* and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
*/
-static INLINE int
+static inline int
get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
{
if ((face == 0 && fall_off_index != 1) ||
@@ -743,7 +743,7 @@ get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
* return a new ycoord based on old face, old coords, cube size
* and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
*/
-static INLINE int
+static inline int
get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
{
if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
@@ -771,7 +771,7 @@ get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
/* Gather a quad of adjacent texels within a tile:
*/
-static INLINE void
+static inline void
get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr,
unsigned x, unsigned y,
@@ -795,7 +795,7 @@ get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
/* Gather a quad of potentially non-adjacent texels:
*/
-static INLINE void
+static inline void
get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr,
int x0, int y0,
@@ -810,7 +810,7 @@ get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
/* Can involve a lot of unnecessary checks for border color:
*/
-static INLINE void
+static inline void
get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr,
@@ -828,7 +828,7 @@ get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
/* 3d variants:
*/
-static INLINE const float *
+static inline const float *
get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr, int x, int y, int z)
{
@@ -846,7 +846,7 @@ get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
}
-static INLINE const float *
+static inline const float *
get_texel_3d(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y, int z)
@@ -866,7 +866,7 @@ get_texel_3d(const struct sp_sampler_view *sp_sview,
/* Get texel pointer for 1D array texture */
-static INLINE const float *
+static inline const float *
get_texel_1d_array(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y)
@@ -884,7 +884,7 @@ get_texel_1d_array(const struct sp_sampler_view *sp_sview,
/* Get texel pointer for 2D array texture */
-static INLINE const float *
+static inline const float *
get_texel_2d_array(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y, int layer)
@@ -905,7 +905,7 @@ get_texel_2d_array(const struct sp_sampler_view *sp_sview,
}
-static INLINE const float *
+static inline const float *
get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr, int x, int y,
float *corner, int layer, unsigned face)
@@ -960,7 +960,7 @@ get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
/* Get texel pointer for cube array texture */
-static INLINE const float *
+static inline const float *
get_texel_cube_array(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y, int layer)
@@ -986,7 +986,7 @@ get_texel_cube_array(const struct sp_sampler_view *sp_sview,
* If level = 2, then we'll return 64 (the width at level=2).
* Return 1 if level > base_pot.
*/
-static INLINE unsigned
+static inline unsigned
pot_level_size(unsigned base_pot, unsigned level)
{
return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
@@ -1016,7 +1016,7 @@ print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZ
/* Some image-filter fastpaths:
*/
-static INLINE void
+static inline void
img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
const struct img_filter_args *args,
@@ -1070,7 +1070,7 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
}
-static INLINE void
+static inline void
img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
const struct img_filter_args *args,
@@ -1104,7 +1104,7 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
}
-static INLINE void
+static inline void
img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
const struct img_filter_args *args,
@@ -1819,7 +1819,7 @@ img_filter_3d_linear(struct sp_sampler_view *sp_sview,
* \param lod_in per-fragment lod_bias or explicit_lod.
* \param lod returns the per-fragment lod.
*/
-static INLINE void
+static inline void
compute_lod(const struct pipe_sampler_state *sampler,
enum tgsi_sampler_control control,
const float biased_lambda,
@@ -1859,7 +1859,7 @@ compute_lod(const struct pipe_sampler_state *sampler,
* \param lod_in per-fragment lod_bias or explicit_lod.
* \param lod results per-fragment lod.
*/
-static INLINE void
+static inline void
compute_lambda_lod(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
const float s[TGSI_QUAD_SIZE],
@@ -1906,7 +1906,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
}
}
-static INLINE unsigned
+static inline unsigned
get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
{
/* gather component is stored in lod_in slot as unsigned */
@@ -2789,7 +2789,7 @@ get_linear_wrap(unsigned mode)
/**
* Is swizzling needed for the given state key?
*/
-static INLINE bool
+static inline bool
any_swizzle(const struct pipe_sampler_view *view)
{
return (view->swizzle_r != PIPE_SWIZZLE_RED ||
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index 4a421a8f882..21f38b2f859 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -185,7 +185,7 @@ sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc)
* This is basically a direct-map cache.
* XXX There's probably lots of ways in which we can improve this.
*/
-static INLINE uint
+static inline uint
tex_cache_pos( union tex_tile_address addr )
{
uint entry = (addr.bits.x +
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
index 2233effc439..b7ad222d715 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
@@ -127,7 +127,7 @@ extern const struct softpipe_tex_cached_tile *
sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
union tex_tile_address addr );
-static INLINE union tex_tile_address
+static inline union tex_tile_address
tex_tile_address( unsigned x,
unsigned y,
unsigned z,
@@ -147,7 +147,7 @@ tex_tile_address( unsigned x,
/* Quickly retrieve tile if it matches last lookup.
*/
-static INLINE const struct softpipe_tex_cached_tile *
+static inline const struct softpipe_tex_cached_tile *
sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
union tex_tile_address addr )
{
diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h
index 1701bf574d9..fbf741a9c72 100644
--- a/src/gallium/drivers/softpipe/sp_texture.h
+++ b/src/gallium/drivers/softpipe/sp_texture.h
@@ -81,13 +81,13 @@ struct softpipe_transfer
/** cast wrappers */
-static INLINE struct softpipe_resource *
+static inline struct softpipe_resource *
softpipe_resource(struct pipe_resource *pt)
{
return (struct softpipe_resource *) pt;
}
-static INLINE struct softpipe_transfer *
+static inline struct softpipe_transfer *
softpipe_transfer(struct pipe_transfer *pt)
{
return (struct softpipe_transfer *) pt;
@@ -99,7 +99,7 @@ softpipe_transfer(struct pipe_transfer *pt)
* This is a short-cut instead of using map()/unmap(), which should
* probably be fixed.
*/
-static INLINE void *
+static inline void *
softpipe_resource_data(struct pipe_resource *pt)
{
if (!pt)
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index b763f526e61..9cc8ac12525 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -52,7 +52,7 @@ sp_alloc_tile(struct softpipe_tile_cache *tc);
(((x) + (y) * 5 + (l) * 10) % NUM_ENTRIES)
-static INLINE int addr_to_clear_pos(union tile_address addr)
+static inline int addr_to_clear_pos(union tile_address addr)
{
int pos;
pos = addr.bits.layer * (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE);
@@ -63,7 +63,7 @@ static INLINE int addr_to_clear_pos(union tile_address addr)
/**
* Is the tile at (x,y) in cleared state?
*/
-static INLINE uint
+static inline uint
is_clear_flag_set(const uint *bitvec, union tile_address addr, unsigned max)
{
int pos, bit;
@@ -77,7 +77,7 @@ is_clear_flag_set(const uint *bitvec, union tile_address addr, unsigned max)
/**
* Mark the tile at (x,y) as not cleared.
*/
-static INLINE void
+static inline void
clear_clear_flag(uint *bitvec, union tile_address addr, unsigned max)
{
int pos;
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
index 167e1ffcada..2c0bafad651 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
@@ -128,7 +128,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
union tile_address addr );
-static INLINE union tile_address
+static inline union tile_address
tile_address( unsigned x,
unsigned y, unsigned layer )
{
@@ -143,7 +143,7 @@ tile_address( unsigned x,
/* Quickly retrieve tile if it matches last lookup.
*/
-static INLINE struct softpipe_cached_tile *
+static inline struct softpipe_cached_tile *
sp_get_cached_tile(struct softpipe_tile_cache *tc,
int x, int y, int layer )
{
diff --git a/src/gallium/drivers/svga/Makefile.am b/src/gallium/drivers/svga/Makefile.am
index e0a8cad7208..d46de95e4b4 100644
--- a/src/gallium/drivers/svga/Makefile.am
+++ b/src/gallium/drivers/svga/Makefile.am
@@ -20,8 +20,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript
index bb4d034f1eb..0ee624616f9 100644
--- a/src/gallium/drivers/svga/SConscript
+++ b/src/gallium/drivers/svga/SConscript
@@ -11,7 +11,6 @@ if env['suncc']:
if env['gcc'] or env['clang']:
env.Append(CPPDEFINES = [
'HAVE_STDINT_H',
- 'HAVE_SYS_TYPES_H',
])
env.Prepend(CPPPATH = [
diff --git a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
index 355edfdb702..5e00906ce36 100644
--- a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
+++ b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
@@ -507,7 +507,7 @@ static const uint32 SVGA3D_OUTPUT_REG_DEPTH_NUM_PS20 = 1;
*----------------------------------------------------------------------
*/
-static INLINE SVGA3dShaderRegType
+static inline SVGA3dShaderRegType
SVGA3dShaderGetRegType(uint32 token)
{
SVGA3dShaderSrcToken src;
diff --git a/src/gallium/drivers/svga/include/svga_overlay.h b/src/gallium/drivers/svga/include/svga_overlay.h
index 0f242dd402c..ccbf7912e6d 100644
--- a/src/gallium/drivers/svga/include/svga_overlay.h
+++ b/src/gallium/drivers/svga/include/svga_overlay.h
@@ -133,7 +133,7 @@ struct {
*----------------------------------------------------------------------
*/
-static INLINE Bool
+static inline Bool
VMwareVideoGetAttributes(const SVGAOverlayFormat format, // IN
uint32 *width, // IN / OUT
uint32 *height, // IN / OUT
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index 474b75c3c86..b271832171d 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -57,7 +57,7 @@
*----------------------------------------------------------------------
*/
-static INLINE void
+static inline void
surface_to_surfaceid(struct svga_winsys_context *swc, // IN
struct pipe_surface *surface, // IN
SVGA3dSurfaceImageId *id, // OUT
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 630f5f77d66..71f038df8c1 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -485,20 +485,20 @@ svga_context_create(struct pipe_screen *screen,
* Inline conversion functions. These are better-typed than the
* macros used previously:
*/
-static INLINE struct svga_context *
+static inline struct svga_context *
svga_context( struct pipe_context *pipe )
{
return (struct svga_context *)pipe;
}
-static INLINE boolean
+static inline boolean
svga_have_gb_objects(const struct svga_context *svga)
{
return svga_screen(svga->pipe.screen)->sws->have_gb_objects;
}
-static INLINE boolean
+static inline boolean
svga_have_gb_dma(const struct svga_context *svga)
{
return svga_screen(svga->pipe.screen)->sws->have_gb_dma;
diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h
index 3a3fcd8fae2..82c9b602d5d 100644
--- a/src/gallium/drivers/svga/svga_debug.h
+++ b/src/gallium/drivers/svga/svga_debug.h
@@ -53,7 +53,7 @@ extern int SVGA_DEBUG;
#define DBSTR(x) ""
#endif
-static INLINE void
+static inline void
SVGA_DBG( unsigned flag, const char *fmt, ... )
{
#ifdef DEBUG
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
index 1b054038e9f..9ab87e8259a 100644
--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -57,7 +57,7 @@ static const unsigned svga_hw_prims =
* PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP or PIPE_PRIM_POLYGON. We convert
* those to other types of primitives with index/translation code.
*/
-static INLINE unsigned
+static inline unsigned
svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
{
switch (mode) {
diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c
index 594eec7166e..2890516c0cf 100644
--- a/src/gallium/drivers/svga/svga_pipe_blend.c
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -33,7 +33,7 @@
#include "svga_hw_reg.h"
-static INLINE unsigned
+static inline unsigned
svga_translate_blend_factor(unsigned factor)
{
switch (factor) {
@@ -58,7 +58,7 @@ svga_translate_blend_factor(unsigned factor)
}
}
-static INLINE unsigned
+static inline unsigned
svga_translate_blend_func(unsigned mode)
{
switch (mode) {
diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
index cb07dbe09a3..8db21fd7476 100644
--- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -32,7 +32,7 @@
#include "svga_hw_reg.h"
-static INLINE unsigned
+static inline unsigned
svga_translate_compare_func(unsigned func)
{
switch (func) {
@@ -50,7 +50,7 @@ svga_translate_compare_func(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
svga_translate_stencil_op(unsigned op)
{
switch (op) {
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index a97a9c46cf8..208a2cd14bf 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -59,7 +59,7 @@ struct svga_query {
/** cast wrapper */
-static INLINE struct svga_query *
+static inline struct svga_query *
svga_query( struct pipe_query *q )
{
return (struct svga_query *)q;
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index 8a87bb467aa..effd490dd22 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -35,7 +35,7 @@
#include "svga_debug.h"
-static INLINE unsigned
+static inline unsigned
translate_wrap_mode(unsigned wrap)
{
switch (wrap) {
@@ -68,7 +68,7 @@ translate_wrap_mode(unsigned wrap)
}
}
-static INLINE unsigned translate_img_filter( unsigned filter )
+static inline unsigned translate_img_filter( unsigned filter )
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
@@ -79,7 +79,7 @@ static INLINE unsigned translate_img_filter( unsigned filter )
}
}
-static INLINE unsigned translate_mip_filter( unsigned filter )
+static inline unsigned translate_mip_filter( unsigned filter )
{
switch (filter) {
case PIPE_TEX_MIPFILTER_NONE: return SVGA3D_TEX_FILTER_NONE;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index d2c7762e7ff..13f85cddbd5 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -45,7 +45,7 @@
* Vertex and index buffers need hardware backing. Constant buffers
* do not. No other types of buffers currently supported.
*/
-static INLINE boolean
+static inline boolean
svga_buffer_needs_hw_storage(unsigned usage)
{
return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER);
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h
index 83b3d342aec..e838beb6661 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -190,7 +190,7 @@ struct svga_buffer
};
-static INLINE struct svga_buffer *
+static inline struct svga_buffer *
svga_buffer(struct pipe_resource *buffer)
{
if (buffer) {
@@ -205,7 +205,7 @@ svga_buffer(struct pipe_resource *buffer)
* Returns TRUE for user buffers. We may
* decide to use an alternate upload path for these buffers.
*/
-static INLINE boolean
+static inline boolean
svga_buffer_is_user_buffer( struct pipe_resource *buffer )
{
if (buffer) {
@@ -219,7 +219,7 @@ svga_buffer_is_user_buffer( struct pipe_resource *buffer )
* Returns a pointer to a struct svga_winsys_screen given a
* struct svga_buffer.
*/
-static INLINE struct svga_winsys_screen *
+static inline struct svga_winsys_screen *
svga_buffer_winsys_screen(struct svga_buffer *sbuf)
{
return svga_screen(sbuf->b.b.screen)->sws;
@@ -230,7 +230,7 @@ svga_buffer_winsys_screen(struct svga_buffer *sbuf)
* Returns whether a buffer has hardware storage that is
* visible to the GPU.
*/
-static INLINE boolean
+static inline boolean
svga_buffer_has_hw_storage(struct svga_buffer *sbuf)
{
if (svga_buffer_winsys_screen(sbuf)->have_gb_objects)
@@ -242,7 +242,7 @@ svga_buffer_has_hw_storage(struct svga_buffer *sbuf)
/**
* Map the hardware storage of a buffer.
*/
-static INLINE void *
+static inline void *
svga_buffer_hw_storage_map(struct svga_context *svga,
struct svga_buffer *sbuf,
unsigned flags, boolean *retry)
@@ -259,7 +259,7 @@ svga_buffer_hw_storage_map(struct svga_context *svga,
/**
* Unmap the hardware storage of a buffer.
*/
-static INLINE void
+static inline void
svga_buffer_hw_storage_unmap(struct svga_context *svga,
struct svga_buffer *sbuf)
{
diff --git a/src/gallium/drivers/svga/svga_resource_texture.h b/src/gallium/drivers/svga/svga_resource_texture.h
index 1ff42fabab9..19dadfb8828 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.h
+++ b/src/gallium/drivers/svga/svga_resource_texture.h
@@ -106,7 +106,7 @@ struct svga_transfer
};
-static INLINE struct svga_texture *svga_texture( struct pipe_resource *resource )
+static inline struct svga_texture *svga_texture( struct pipe_resource *resource )
{
struct svga_texture *tex = (struct svga_texture *)resource;
assert(tex == NULL || tex->b.vtbl == &svga_texture_vtbl);
@@ -114,7 +114,7 @@ static INLINE struct svga_texture *svga_texture( struct pipe_resource *resource
}
-static INLINE struct svga_transfer *
+static inline struct svga_transfer *
svga_transfer(struct pipe_transfer *transfer)
{
assert(transfer);
@@ -127,7 +127,7 @@ svga_transfer(struct pipe_transfer *transfer)
* This is used to track updates to textures when we draw into
* them via a surface.
*/
-static INLINE void
+static inline void
svga_age_texture_view(struct svga_texture *tex, unsigned level)
{
assert(level < Elements(tex->view_age));
@@ -138,7 +138,7 @@ svga_age_texture_view(struct svga_texture *tex, unsigned level)
/**
* Mark the given texture face/level as being defined.
*/
-static INLINE void
+static inline void
svga_define_texture_level(struct svga_texture *tex,
unsigned face,unsigned level)
{
@@ -148,7 +148,7 @@ svga_define_texture_level(struct svga_texture *tex,
}
-static INLINE bool
+static inline bool
svga_is_texture_level_defined(const struct svga_texture *tex,
unsigned face, unsigned level)
{
@@ -177,7 +177,7 @@ check_face_level(const struct svga_texture *tex,
}
-static INLINE void
+static inline void
svga_set_texture_rendered_to(struct svga_texture *tex,
unsigned face, unsigned level)
{
@@ -186,7 +186,7 @@ svga_set_texture_rendered_to(struct svga_texture *tex,
}
-static INLINE void
+static inline void
svga_clear_texture_rendered_to(struct svga_texture *tex,
unsigned face, unsigned level)
{
@@ -195,7 +195,7 @@ svga_clear_texture_rendered_to(struct svga_texture *tex,
}
-static INLINE boolean
+static inline boolean
svga_was_texture_rendered_to(const struct svga_texture *tex,
unsigned face, unsigned level)
{
diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h
index 2087c1be85e..7f14323f84f 100644
--- a/src/gallium/drivers/svga/svga_sampler_view.h
+++ b/src/gallium/drivers/svga/svga_sampler_view.h
@@ -86,7 +86,7 @@ svga_destroy_sampler_view_priv(struct svga_sampler_view *v);
void
svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv);
-static INLINE void
+static inline void
svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v)
{
struct svga_sampler_view *old = *ptr;
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 56e486786df..66c3deaa9e7 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -309,6 +309,10 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_UMA:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
}
@@ -443,7 +447,9 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
return 0;
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_COMPUTE:
- /* no support for geometry or compute shaders at this time */
+ case PIPE_SHADER_TESS_CTRL:
+ case PIPE_SHADER_TESS_EVAL:
+ /* no support for geometry, tess or compute shaders at this time */
return 0;
default:
debug_printf("Unexpected shader type (%u) query\n", shader);
@@ -543,21 +549,15 @@ svga_fence_reference(struct pipe_screen *screen,
static boolean
-svga_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct svga_winsys_screen *sws = svga_screen(screen)->sws;
- return sws->fence_signalled(sws, fence, 0) == 0;
-}
-
-
-static boolean
svga_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+ if (!timeout)
+ return sws->fence_signalled(sws, fence, 0) == 0;
+
SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n",
__FUNCTION__, fence);
@@ -645,7 +645,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
screen->is_format_supported = svga_is_format_supported;
screen->context_create = svga_context_create;
screen->fence_reference = svga_fence_reference;
- screen->fence_signalled = svga_fence_signalled;
screen->fence_finish = svga_fence_finish;
screen->get_driver_query_info = svga_get_driver_query_info;
svgascreen->sws = sws;
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h
index b85191c4b26..ea1e743dfe5 100644
--- a/src/gallium/drivers/svga/svga_screen.h
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -82,7 +82,7 @@ struct svga_screen
#ifndef DEBUG
/** cast wrapper */
-static INLINE struct svga_screen *
+static inline struct svga_screen *
svga_screen(struct pipe_screen *pscreen)
{
return (struct svga_screen *) pscreen;
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
index f63f7836187..3c765394a88 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -76,7 +76,7 @@ surface_size(const struct svga_host_surface_cache_key *key)
/**
* Compute the bucket for this key.
*/
-static INLINE unsigned
+static inline unsigned
svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key)
{
return util_hash_crc32(key, sizeof *key) % SVGA_HOST_SURFACE_CACHE_BUCKETS;
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h
index fd500ae4401..5102159b96a 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -44,7 +44,7 @@ svga_destroy_shader_variant(struct svga_context *svga,
/**
* Check if a shader's bytecode exceeds the device limits.
*/
-static INLINE boolean
+static inline boolean
svga_shader_too_large(const struct svga_context *svga,
const struct svga_shader_variant *variant)
{
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 566a79407e5..8cdce742b3b 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -41,7 +41,7 @@
-static INLINE int
+static inline int
compare_fs_keys(const struct svga_fs_compile_key *a,
const struct svga_fs_compile_key *b)
{
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
index fb56b3d36ba..ebb98373e2b 100644
--- a/src/gallium/drivers/svga/svga_state_rss.c
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -61,7 +61,7 @@ do { \
} while (0)
-static INLINE void
+static inline void
svga_queue_rs( struct rs_queue *q,
unsigned rss,
unsigned value )
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index 0ab571c0588..41334bd7cb9 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -274,7 +274,7 @@ do { \
} while (0)
-static INLINE void
+static inline void
svga_queue_tss( struct ts_queue *q,
unsigned unit,
unsigned tss,
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 545c9d7420f..c2a0f1ee6b1 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -41,7 +41,7 @@
#include "svga_hw_reg.h"
-static INLINE int
+static inline int
compare_vs_keys(const struct svga_vs_compile_key *a,
const struct svga_vs_compile_key *b)
{
diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h
index 7b8f6f018d2..2fa72a1c8f0 100644
--- a/src/gallium/drivers/svga/svga_surface.h
+++ b/src/gallium/drivers/svga/svga_surface.h
@@ -84,7 +84,7 @@ svga_texture_copy_handle(struct svga_context *svga,
unsigned width, unsigned height, unsigned depth);
-static INLINE struct svga_surface *
+static inline struct svga_surface *
svga_surface(struct pipe_surface *surface)
{
assert(surface);
@@ -92,7 +92,7 @@ svga_surface(struct pipe_surface *surface)
}
-static INLINE const struct svga_surface *
+static inline const struct svga_surface *
svga_surface_const(const struct pipe_surface *surface)
{
assert(surface);
diff --git a/src/gallium/drivers/svga/svga_swtnl_private.h b/src/gallium/drivers/svga/svga_swtnl_private.h
index 608950d7af6..e2106e1e8e6 100644
--- a/src/gallium/drivers/svga/svga_swtnl_private.h
+++ b/src/gallium/drivers/svga/svga_swtnl_private.h
@@ -76,7 +76,7 @@ struct svga_vbuf_render {
/**
* Basically a cast wrapper.
*/
-static INLINE struct svga_vbuf_render *
+static inline struct svga_vbuf_render *
svga_vbuf_render( struct vbuf_render *render )
{
assert(render);
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 9aafd851264..2e2ff5e4673 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -84,7 +84,7 @@ svga_shader_expand(struct svga_shader_emitter *emit)
}
-static INLINE boolean
+static inline boolean
reserve(struct svga_shader_emitter *emit, unsigned nr_dwords)
{
if (emit->ptr - emit->buf + nr_dwords * sizeof(unsigned) >= emit->size) {
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index e7a2a134ca5..5c47a4ad39f 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -124,7 +124,7 @@ struct svga_shader_variant
* The real use of this information is matching vertex elements to
* fragment shader inputs in the case where vertex shader is disabled.
*/
-static INLINE void svga_generate_vdecl_semantics( unsigned idx,
+static inline void svga_generate_vdecl_semantics( unsigned idx,
unsigned *usage,
unsigned *usage_index )
{
@@ -140,12 +140,12 @@ static INLINE void svga_generate_vdecl_semantics( unsigned idx,
-static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
+static inline unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
{
return sizeof *key;
}
-static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
+static inline unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
{
return (const char *)&key->tex[key->num_textures] - (const char *)key;
}
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 1894296e6d7..1a1dac23507 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -167,7 +167,7 @@ svga_translate_decl_sm30(struct svga_shader_emitter *emit,
/** Emit the given SVGA3dShaderInstToken opcode */
-static INLINE boolean
+static inline boolean
emit_instruction(struct svga_shader_emitter *emit,
SVGA3dShaderInstToken opcode)
{
@@ -176,7 +176,7 @@ emit_instruction(struct svga_shader_emitter *emit,
/** Generate a SVGA3dShaderInstToken for the given SVGA3D shader opcode */
-static INLINE SVGA3dShaderInstToken
+static inline SVGA3dShaderInstToken
inst_token(unsigned opcode)
{
SVGA3dShaderInstToken inst;
@@ -192,7 +192,7 @@ inst_token(unsigned opcode)
* Generate a SVGA3dShaderInstToken for the given SVGA3D shader opcode
* with the predication flag set.
*/
-static INLINE SVGA3dShaderInstToken
+static inline SVGA3dShaderInstToken
inst_token_predicated(unsigned opcode)
{
SVGA3dShaderInstToken inst;
@@ -209,7 +209,7 @@ inst_token_predicated(unsigned opcode)
* Generate a SVGA3dShaderInstToken for a SETP instruction (set predicate)
* using the given comparison operator (one of SVGA3DOPCOMP_xx).
*/
-static INLINE SVGA3dShaderInstToken
+static inline SVGA3dShaderInstToken
inst_token_setp(unsigned operator)
{
SVGA3dShaderInstToken inst;
@@ -227,7 +227,7 @@ inst_token_setp(unsigned operator)
* Note that this function is used to create tokens for output registers,
* temp registers AND constants (see emit_def_const()).
*/
-static INLINE SVGA3dShaderDestToken
+static inline SVGA3dShaderDestToken
dst_register(unsigned file, int number)
{
SVGA3dShaderDestToken dest;
@@ -255,7 +255,7 @@ dst_register(unsigned file, int number)
* Apply a writemask to the given SVGA3dShaderDestToken, returning a
* new SVGA3dShaderDestToken.
*/
-static INLINE SVGA3dShaderDestToken
+static inline SVGA3dShaderDestToken
writemask(SVGA3dShaderDestToken dest, unsigned mask)
{
assert(dest.mask & mask);
@@ -265,7 +265,7 @@ writemask(SVGA3dShaderDestToken dest, unsigned mask)
/** Create a SVGA3dShaderSrcToken given a register file and number */
-static INLINE SVGA3dShaderSrcToken
+static inline SVGA3dShaderSrcToken
src_token(unsigned file, int number)
{
SVGA3dShaderSrcToken src;
@@ -289,7 +289,7 @@ src_token(unsigned file, int number)
/** Create a src_register given a register file and register number */
-static INLINE struct src_register
+static inline struct src_register
src_register(unsigned file, int number)
{
struct src_register src;
@@ -301,7 +301,7 @@ src_register(unsigned file, int number)
}
/** Translate src_register into SVGA3dShaderDestToken */
-static INLINE SVGA3dShaderDestToken
+static inline SVGA3dShaderDestToken
dst(struct src_register src)
{
return dst_register(SVGA3dShaderGetRegType(src.base.value), src.base.num);
@@ -309,7 +309,7 @@ dst(struct src_register src)
/** Translate SVGA3dShaderDestToken to a src_register */
-static INLINE struct src_register
+static inline struct src_register
src(SVGA3dShaderDestToken dst)
{
return src_register(SVGA3dShaderGetRegType(dst.value), dst.num);
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h
index 5db64bf135b..0a2e3d5f345 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader.h
+++ b/src/gallium/drivers/svga/svgadump/svga_shader.h
@@ -56,7 +56,7 @@ struct sh_reg
unsigned is_reg:1;
};
-static INLINE unsigned
+static inline unsigned
sh_reg_type( struct sh_reg reg )
{
return reg.type_lo | (reg.type_hi << 3);
@@ -138,7 +138,7 @@ struct sh_dstreg
unsigned is_reg:1;
};
-static INLINE unsigned
+static inline unsigned
sh_dstreg_type( struct sh_dstreg reg )
{
return reg.type_lo | (reg.type_hi << 3);
@@ -169,7 +169,7 @@ struct sh_srcreg
unsigned is_reg:1;
};
-static INLINE unsigned
+static inline unsigned
sh_srcreg_type( struct sh_srcreg reg )
{
return reg.type_lo | (reg.type_hi << 3);
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 0013c963e7a..7f6d0645112 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -49,13 +49,13 @@ struct trace_query
};
-static INLINE struct trace_query *
+static inline struct trace_query *
trace_query(struct pipe_query *query) {
return (struct trace_query *)query;
}
-static INLINE struct pipe_query *
+static inline struct pipe_query *
trace_query_unwrap(struct pipe_query *query)
{
if (query) {
@@ -66,7 +66,7 @@ trace_query_unwrap(struct pipe_query *query)
}
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
trace_resource_unwrap(struct trace_context *tr_ctx,
struct pipe_resource *resource)
{
@@ -82,7 +82,7 @@ trace_resource_unwrap(struct trace_context *tr_ctx,
}
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
trace_surface_unwrap(struct trace_context *tr_ctx,
struct pipe_surface *surface)
{
@@ -105,7 +105,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
}
-static INLINE void
+static inline void
trace_context_draw_vbo(struct pipe_context *_pipe,
const struct pipe_draw_info *info)
{
@@ -125,7 +125,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe,
}
-static INLINE struct pipe_query *
+static inline struct pipe_query *
trace_context_create_query(struct pipe_context *_pipe,
unsigned query_type,
unsigned index)
@@ -163,7 +163,7 @@ trace_context_create_query(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_destroy_query(struct pipe_context *_pipe,
struct pipe_query *_query)
{
@@ -185,7 +185,7 @@ trace_context_destroy_query(struct pipe_context *_pipe,
}
-static INLINE boolean
+static inline boolean
trace_context_begin_query(struct pipe_context *_pipe,
struct pipe_query *query)
{
@@ -207,7 +207,7 @@ trace_context_begin_query(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_end_query(struct pipe_context *_pipe,
struct pipe_query *query)
{
@@ -227,7 +227,7 @@ trace_context_end_query(struct pipe_context *_pipe,
}
-static INLINE boolean
+static inline boolean
trace_context_get_query_result(struct pipe_context *_pipe,
struct pipe_query *_query,
boolean wait,
@@ -262,7 +262,7 @@ trace_context_get_query_result(struct pipe_context *_pipe,
}
-static INLINE void *
+static inline void *
trace_context_create_blend_state(struct pipe_context *_pipe,
const struct pipe_blend_state *state)
{
@@ -285,7 +285,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_blend_state(struct pipe_context *_pipe,
void *state)
{
@@ -303,7 +303,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_blend_state(struct pipe_context *_pipe,
void *state)
{
@@ -321,7 +321,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe,
}
-static INLINE void *
+static inline void *
trace_context_create_sampler_state(struct pipe_context *_pipe,
const struct pipe_sampler_state *state)
{
@@ -344,7 +344,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_sampler_states(struct pipe_context *_pipe,
unsigned shader,
unsigned start,
@@ -371,7 +371,7 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_sampler_state(struct pipe_context *_pipe,
void *state)
{
@@ -389,7 +389,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe,
}
-static INLINE void *
+static inline void *
trace_context_create_rasterizer_state(struct pipe_context *_pipe,
const struct pipe_rasterizer_state *state)
{
@@ -412,7 +412,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
void *state)
{
@@ -430,7 +430,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
void *state)
{
@@ -448,7 +448,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
}
-static INLINE void *
+static inline void *
trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
const struct pipe_depth_stencil_alpha_state *state)
{
@@ -471,7 +471,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
void *state)
{
@@ -489,7 +489,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
void *state)
{
@@ -508,7 +508,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
#define TRACE_SHADER_STATE(shader_type) \
- static INLINE void * \
+ static inline void * \
trace_context_create_##shader_type##_state(struct pipe_context *_pipe, \
const struct pipe_shader_state *state) \
{ \
@@ -524,7 +524,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
return result; \
} \
\
- static INLINE void \
+ static inline void \
trace_context_bind_##shader_type##_state(struct pipe_context *_pipe, \
void *state) \
{ \
@@ -537,7 +537,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
trace_dump_call_end(); \
} \
\
- static INLINE void \
+ static inline void \
trace_context_delete_##shader_type##_state(struct pipe_context *_pipe, \
void *state) \
{ \
@@ -559,7 +559,7 @@ TRACE_SHADER_STATE(tes)
#undef TRACE_SHADER_STATE
-static INLINE void *
+static inline void *
trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
unsigned num_elements,
const struct pipe_vertex_element *elements)
@@ -587,7 +587,7 @@ trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
void *state)
{
@@ -605,7 +605,7 @@ trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
void *state)
{
@@ -623,7 +623,7 @@ trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_blend_color(struct pipe_context *_pipe,
const struct pipe_blend_color *state)
{
@@ -641,7 +641,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_stencil_ref(struct pipe_context *_pipe,
const struct pipe_stencil_ref *state)
{
@@ -659,7 +659,7 @@ trace_context_set_stencil_ref(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_clip_state(struct pipe_context *_pipe,
const struct pipe_clip_state *state)
{
@@ -676,7 +676,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static INLINE void
+static inline void
trace_context_set_sample_mask(struct pipe_context *_pipe,
unsigned sample_mask)
{
@@ -693,7 +693,7 @@ trace_context_set_sample_mask(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static INLINE void
+static inline void
trace_context_set_constant_buffer(struct pipe_context *_pipe,
uint shader, uint index,
struct pipe_constant_buffer *constant_buffer)
@@ -721,7 +721,7 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_framebuffer_state(struct pipe_context *_pipe,
const struct pipe_framebuffer_state *state)
{
@@ -751,7 +751,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_polygon_stipple(struct pipe_context *_pipe,
const struct pipe_poly_stipple *state)
{
@@ -769,7 +769,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_scissor_states(struct pipe_context *_pipe,
unsigned start_slot,
unsigned num_scissors,
@@ -791,7 +791,7 @@ trace_context_set_scissor_states(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_viewport_states(struct pipe_context *_pipe,
unsigned start_slot,
unsigned num_viewports,
@@ -938,7 +938,7 @@ trace_context_surface_destroy(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_sampler_views(struct pipe_context *_pipe,
unsigned shader,
unsigned start,
@@ -974,7 +974,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_vertex_buffers(struct pipe_context *_pipe,
unsigned start_slot, unsigned num_buffers,
const struct pipe_vertex_buffer *buffers)
@@ -1008,7 +1008,7 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_index_buffer(struct pipe_context *_pipe,
const struct pipe_index_buffer *ib)
{
@@ -1033,7 +1033,7 @@ trace_context_set_index_buffer(struct pipe_context *_pipe,
}
-static INLINE struct pipe_stream_output_target *
+static inline struct pipe_stream_output_target *
trace_context_create_stream_output_target(struct pipe_context *_pipe,
struct pipe_resource *res,
unsigned buffer_offset,
@@ -1063,7 +1063,7 @@ trace_context_create_stream_output_target(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_stream_output_target_destroy(
struct pipe_context *_pipe,
struct pipe_stream_output_target *target)
@@ -1082,7 +1082,7 @@ trace_context_stream_output_target_destroy(
}
-static INLINE void
+static inline void
trace_context_set_stream_output_targets(struct pipe_context *_pipe,
unsigned num_targets,
struct pipe_stream_output_target **tgs,
@@ -1104,7 +1104,7 @@ trace_context_set_stream_output_targets(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_resource_copy_region(struct pipe_context *_pipe,
struct pipe_resource *dst,
unsigned dst_level,
@@ -1139,7 +1139,7 @@ trace_context_resource_copy_region(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_blit(struct pipe_context *_pipe,
const struct pipe_blit_info *_info)
{
@@ -1181,7 +1181,7 @@ trace_context_flush_resource(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_clear(struct pipe_context *_pipe,
unsigned buffers,
const union pipe_color_union *color,
@@ -1210,7 +1210,7 @@ trace_context_clear(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_clear_render_target(struct pipe_context *_pipe,
struct pipe_surface *dst,
const union pipe_color_union *color,
@@ -1237,7 +1237,7 @@ trace_context_clear_render_target(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static INLINE void
+static inline void
trace_context_clear_depth_stencil(struct pipe_context *_pipe,
struct pipe_surface *dst,
unsigned clear_flags,
@@ -1269,7 +1269,7 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static INLINE void
+static inline void
trace_context_flush(struct pipe_context *_pipe,
struct pipe_fence_handle **fence,
unsigned flags)
@@ -1291,7 +1291,7 @@ trace_context_flush(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_destroy(struct pipe_context *_pipe)
{
struct trace_context *tr_ctx = trace_context(_pipe);
diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h
index 1e5ad88d034..ad57d9d5243 100644
--- a/src/gallium/drivers/trace/tr_context.h
+++ b/src/gallium/drivers/trace/tr_context.h
@@ -54,7 +54,7 @@ void
trace_context_check(const struct pipe_context *pipe);
-static INLINE struct trace_context *
+static inline struct trace_context *
trace_context(struct pipe_context *pipe)
{
assert(pipe);
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index 753b92d8b54..601e2cbbec5 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -64,7 +64,7 @@ static long unsigned call_no = 0;
static boolean dumping = FALSE;
-static INLINE void
+static inline void
trace_dump_write(const char *buf, size_t size)
{
if (stream) {
@@ -73,14 +73,14 @@ trace_dump_write(const char *buf, size_t size)
}
-static INLINE void
+static inline void
trace_dump_writes(const char *s)
{
trace_dump_write(s, strlen(s));
}
-static INLINE void
+static inline void
trace_dump_writef(const char *format, ...)
{
static char buf[1024];
@@ -93,7 +93,7 @@ trace_dump_writef(const char *format, ...)
}
-static INLINE void
+static inline void
trace_dump_escape(const char *str)
{
const unsigned char *p = (const unsigned char *)str;
@@ -117,7 +117,7 @@ trace_dump_escape(const char *str)
}
-static INLINE void
+static inline void
trace_dump_indent(unsigned level)
{
unsigned i;
@@ -126,14 +126,14 @@ trace_dump_indent(unsigned level)
}
-static INLINE void
+static inline void
trace_dump_newline(void)
{
trace_dump_writes("\n");
}
-static INLINE void
+static inline void
trace_dump_tag(const char *name)
{
trace_dump_writes("<");
@@ -142,7 +142,7 @@ trace_dump_tag(const char *name)
}
-static INLINE void
+static inline void
trace_dump_tag_begin(const char *name)
{
trace_dump_writes("<");
@@ -150,7 +150,7 @@ trace_dump_tag_begin(const char *name)
trace_dump_writes(">");
}
-static INLINE void
+static inline void
trace_dump_tag_begin1(const char *name,
const char *attr1, const char *value1)
{
@@ -164,7 +164,7 @@ trace_dump_tag_begin1(const char *name,
}
-static INLINE void
+static inline void
trace_dump_tag_begin2(const char *name,
const char *attr1, const char *value1,
const char *attr2, const char *value2)
@@ -183,7 +183,7 @@ trace_dump_tag_begin2(const char *name,
}
-static INLINE void
+static inline void
trace_dump_tag_begin3(const char *name,
const char *attr1, const char *value1,
const char *attr2, const char *value2,
@@ -207,7 +207,7 @@ trace_dump_tag_begin3(const char *name,
}
-static INLINE void
+static inline void
trace_dump_tag_end(const char *name)
{
trace_dump_writes("</");
diff --git a/src/gallium/drivers/trace/tr_dump_defines.h b/src/gallium/drivers/trace/tr_dump_defines.h
index 0c83c2b68f1..b38d63eac59 100644
--- a/src/gallium/drivers/trace/tr_dump_defines.h
+++ b/src/gallium/drivers/trace/tr_dump_defines.h
@@ -34,7 +34,7 @@
#include "tr_dump.h"
-static INLINE void
+static inline void
trace_dump_format(enum pipe_format format)
{
if (!trace_dumping_enabled_locked())
@@ -44,7 +44,7 @@ trace_dump_format(enum pipe_format format)
}
-static INLINE void
+static inline void
trace_dump_query_type(unsigned value)
{
if (!trace_dumping_enabled_locked())
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index 266626defa8..1d86a378eea 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -370,29 +370,6 @@ trace_screen_fence_reference(struct pipe_screen *_screen,
static boolean
-trace_screen_fence_signalled(struct pipe_screen *_screen,
- struct pipe_fence_handle *fence)
-{
- struct trace_screen *tr_scr = trace_screen(_screen);
- struct pipe_screen *screen = tr_scr->screen;
- int result;
-
- trace_dump_call_begin("pipe_screen", "fence_signalled");
-
- trace_dump_arg(ptr, screen);
- trace_dump_arg(ptr, fence);
-
- result = screen->fence_signalled(screen, fence);
-
- trace_dump_ret(bool, result);
-
- trace_dump_call_end();
-
- return result;
-}
-
-
-static boolean
trace_screen_fence_finish(struct pipe_screen *_screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
@@ -503,7 +480,6 @@ trace_screen_create(struct pipe_screen *screen)
tr_scr->base.resource_get_handle = trace_screen_resource_get_handle;
tr_scr->base.resource_destroy = trace_screen_resource_destroy;
tr_scr->base.fence_reference = trace_screen_fence_reference;
- tr_scr->base.fence_signalled = trace_screen_fence_signalled;
tr_scr->base.fence_finish = trace_screen_fence_finish;
tr_scr->base.flush_frontbuffer = trace_screen_flush_frontbuffer;
tr_scr->base.get_timestamp = trace_screen_get_timestamp;
diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h
index 5e45c3c2f8f..e48b7b39e24 100644
--- a/src/gallium/drivers/trace/tr_texture.h
+++ b/src/gallium/drivers/trace/tr_texture.h
@@ -85,7 +85,7 @@ struct trace_transfer
};
-static INLINE struct trace_resource *
+static inline struct trace_resource *
trace_resource(struct pipe_resource *texture)
{
if(!texture)
@@ -95,7 +95,7 @@ trace_resource(struct pipe_resource *texture)
}
-static INLINE struct trace_surface *
+static inline struct trace_surface *
trace_surface(struct pipe_surface *surface)
{
if(!surface)
@@ -105,7 +105,7 @@ trace_surface(struct pipe_surface *surface)
}
-static INLINE struct trace_sampler_view *
+static inline struct trace_sampler_view *
trace_sampler_view(struct pipe_sampler_view *sampler_view)
{
if (!sampler_view)
@@ -114,7 +114,7 @@ trace_sampler_view(struct pipe_sampler_view *sampler_view)
}
-static INLINE struct trace_transfer *
+static inline struct trace_transfer *
trace_transfer(struct pipe_transfer *transfer)
{
if(!transfer)
diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am
index 3f62ce21a9f..f4a57ba3404 100644
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
@@ -30,10 +28,10 @@ SIM_LDFLAGS = -lsimpenrose
endif
AM_CFLAGS = \
+ -I$(top_builddir)/src/glsl/nir \
$(LIBDRM_CFLAGS) \
$(GALLIUM_DRIVER_CFLAGS) \
$(SIM_CFLAGS) \
- -I$(top_srcdir)/src/mesa/ \
$()
noinst_LTLIBRARIES = libvc4.la
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 1eb029e67e7..6fb40c20562 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -19,6 +19,8 @@ C_SOURCES := \
vc4_fence.c \
vc4_formats.c \
vc4_job.c \
+ vc4_nir_lower_blend.c \
+ vc4_nir_lower_io.c \
vc4_opt_algebraic.c \
vc4_opt_constant_folding.c \
vc4_opt_copy_propagation.c \
@@ -49,4 +51,5 @@ C_SOURCES := \
vc4_state.c \
vc4_tiling.c \
vc4_tiling.h \
+ vc4_uniforms.c \
$()
diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h
index 1fd8aa9fb28..ffc973735ae 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@@ -26,17 +26,6 @@
#include "vc4_simulator_validate.h"
-enum vc4_bo_mode {
- VC4_MODE_UNDECIDED,
- VC4_MODE_RENDER,
- VC4_MODE_SHADER,
-};
-
-struct vc4_bo_exec_state {
- struct drm_gem_cma_object *bo;
- enum vc4_bo_mode mode;
-};
-
struct vc4_exec_info {
/* Sequence number for this bin/render job. */
uint64_t seqno;
@@ -47,7 +36,7 @@ struct vc4_exec_info {
/* This is the array of BOs that were looked up at the start of exec.
* Command validation will use indices into this array.
*/
- struct vc4_bo_exec_state *bo;
+ struct drm_gem_cma_object **bo;
uint32_t bo_count;
/* List of other BOs used in the job that need to be released
@@ -72,7 +61,6 @@ struct vc4_exec_info {
* command lists.
*/
struct vc4_shader_state {
- uint8_t packet;
uint32_t addr;
/* Maximum vertex index referenced by any primitive using this
* shader state.
@@ -88,6 +76,7 @@ struct vc4_exec_info {
bool found_tile_binning_mode_config_packet;
bool found_start_tile_binning_packet;
bool found_increment_semaphore_packet;
+ bool found_flush;
uint8_t bin_tiles_x, bin_tiles_y;
struct drm_gem_cma_object *tile_bo;
uint32_t tile_alloc_offset;
@@ -99,6 +88,9 @@ struct vc4_exec_info {
uint32_t ct0ca, ct0ea;
uint32_t ct1ca, ct1ea;
+ /* Pointer to the unvalidated bin CL (if present). */
+ void *bin_u;
+
/* Pointers to the shader recs. These paddr gets incremented as CL
* packets are relocated in validate_gl_shader_state, and the vaddrs
* (u and v) get incremented and size decremented as the shader recs
@@ -168,10 +160,8 @@ vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
-bool vc4_use_bo(struct vc4_exec_info *exec,
- uint32_t hindex,
- enum vc4_bo_mode mode,
- struct drm_gem_cma_object **obj);
+struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
+ uint32_t hindex);
int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
diff --git a/src/gallium/drivers/vc4/kernel/vc4_gem.c b/src/gallium/drivers/vc4/kernel/vc4_gem.c
index e4b7fea5968..93f9ec7ed9b 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_gem.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_gem.c
@@ -112,6 +112,8 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
exec->ct0ca = exec->exec_bo->paddr + bin_offset;
+ exec->bin_u = bin;
+
exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
exec->shader_rec_size = args->shader_rec_size;
diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h
index 88cfc0fa9f0..771e2b78761 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_packet.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h
@@ -88,16 +88,22 @@ enum vc4_packet {
#define VC4_PACKET_START_TILE_BINNING_SIZE 1
#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
+#define VC4_PACKET_BRANCH_SIZE 5
#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
+#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5
+#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5
#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
+#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1
+#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1
#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
+#define VC4_PACKET_VG_SHADER_STATE_SIZE 5
#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
#define VC4_PACKET_POINT_SIZE_SIZE 5
@@ -106,6 +112,7 @@ enum vc4_packet {
#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
#define VC4_PACKET_CLIP_WINDOW_SIZE 9
#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
+#define VC4_PACKET_Z_CLIPPING_SIZE 9
#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
@@ -136,6 +143,16 @@ enum vc4_packet {
/** @{
*
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF (1 << 3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0)
+
+/** @{
+ *
* byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
*/
diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
index e2d907ad91f..b827eb7e9e1 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
@@ -100,7 +100,8 @@ static void emit_tile(struct vc4_exec_info *exec,
struct vc4_rcl_setup *setup,
uint8_t x, uint8_t y, bool first, bool last)
{
- bool has_bin = exec->args->bin_cl_size != 0;
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
/* Note that the load doesn't actually occur until the
* tile coords packet is processed, and only one load
@@ -108,10 +109,9 @@ static void emit_tile(struct vc4_exec_info *exec,
*/
if (setup->color_read) {
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->color_read.bits);
+ rcl_u16(setup, args->color_read.bits);
rcl_u32(setup,
- setup->color_read->paddr +
- exec->args->color_read.offset);
+ setup->color_read->paddr + args->color_read.offset);
}
if (setup->zs_read) {
@@ -122,9 +122,8 @@ static void emit_tile(struct vc4_exec_info *exec,
}
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->zs_read.bits);
- rcl_u32(setup,
- setup->zs_read->paddr + exec->args->zs_read.offset);
+ rcl_u16(setup, args->zs_read.bits);
+ rcl_u32(setup, setup->zs_read->paddr + args->zs_read.offset);
}
/* Clipping depends on tile coordinates having been
@@ -147,11 +146,11 @@ static void emit_tile(struct vc4_exec_info *exec,
if (setup->zs_write) {
rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->zs_write.bits |
+ rcl_u16(setup, args->zs_write.bits |
(setup->color_ms_write ?
VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0));
rcl_u32(setup,
- (setup->zs_write->paddr + exec->args->zs_write.offset) |
+ (setup->zs_write->paddr + args->zs_write.offset) |
((last && !setup->color_ms_write) ?
VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
}
@@ -172,11 +171,12 @@ static void emit_tile(struct vc4_exec_info *exec,
static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
struct vc4_rcl_setup *setup)
{
- bool has_bin = exec->args->bin_cl_size != 0;
- uint8_t min_x_tile = exec->args->min_x_tile;
- uint8_t min_y_tile = exec->args->min_y_tile;
- uint8_t max_x_tile = exec->args->max_x_tile;
- uint8_t max_y_tile = exec->args->max_y_tile;
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
+ uint8_t min_x_tile = args->min_x_tile;
+ uint8_t min_y_tile = args->min_y_tile;
+ uint8_t max_x_tile = args->max_x_tile;
+ uint8_t max_y_tile = args->max_y_tile;
uint8_t xtiles = max_x_tile - min_x_tile + 1;
uint8_t ytiles = max_y_tile - min_y_tile + 1;
uint8_t x, y;
@@ -185,7 +185,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
- if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
size += VC4_PACKET_CLEAR_COLORS_SIZE +
VC4_PACKET_TILE_COORDINATES_SIZE +
VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
@@ -208,7 +208,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
}
if (setup->zs_write)
- loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+ loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
if (setup->color_ms_write) {
if (setup->zs_write)
loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
@@ -226,23 +226,23 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
rcl_u32(setup,
(setup->color_ms_write ?
(setup->color_ms_write->paddr +
- exec->args->color_ms_write.offset) :
+ args->color_ms_write.offset) :
0));
- rcl_u16(setup, exec->args->width);
- rcl_u16(setup, exec->args->height);
- rcl_u16(setup, exec->args->color_ms_write.bits);
+ rcl_u16(setup, args->width);
+ rcl_u16(setup, args->height);
+ rcl_u16(setup, args->color_ms_write.bits);
/* The tile buffer gets cleared when the previous tile is stored. If
* the clear values changed between frames, then the tile buffer has
* stale clear values in it, so we have to do a store in None mode (no
* writes) so that we trigger the tile buffer clear.
*/
- if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
- rcl_u32(setup, exec->args->clear_color[0]);
- rcl_u32(setup, exec->args->clear_color[1]);
- rcl_u32(setup, exec->args->clear_z);
- rcl_u8(setup, exec->args->clear_s);
+ rcl_u32(setup, args->clear_color[0]);
+ rcl_u32(setup, args->clear_color[1]);
+ rcl_u32(setup, args->clear_z);
+ rcl_u8(setup, args->clear_s);
vc4_tile_coordinates(setup, 0, 0);
@@ -286,7 +286,8 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
if (surf->hindex == ~0)
return 0;
- if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
+ *obj = vc4_use_bo(exec, surf->hindex);
+ if (!*obj)
return -EINVAL;
if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
@@ -365,7 +366,8 @@ vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec,
if (surf->hindex == ~0)
return 0;
- if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
+ *obj = vc4_use_bo(exec, surf->hindex);
+ if (!*obj)
return -EINVAL;
if (tiling > VC4_TILING_FORMAT_LT) {
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c
index a0b67a7e50b..b248831113c 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -94,42 +94,42 @@ size_is_lt(uint32_t width, uint32_t height, int cpp)
height <= 4 * utile_height(cpp));
}
-bool
-vc4_use_bo(struct vc4_exec_info *exec,
- uint32_t hindex,
- enum vc4_bo_mode mode,
- struct drm_gem_cma_object **obj)
+struct drm_gem_cma_object *
+vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
{
- *obj = NULL;
+ struct drm_gem_cma_object *obj;
+ struct drm_vc4_bo *bo;
if (hindex >= exec->bo_count) {
DRM_ERROR("BO index %d greater than BO count %d\n",
hindex, exec->bo_count);
- return false;
+ return NULL;
}
+ obj = exec->bo[hindex];
+ bo = to_vc4_bo(&obj->base);
- if (exec->bo[hindex].mode != mode) {
- if (exec->bo[hindex].mode == VC4_MODE_UNDECIDED) {
- exec->bo[hindex].mode = mode;
- } else {
- DRM_ERROR("BO index %d reused with mode %d vs %d\n",
- hindex, exec->bo[hindex].mode, mode);
- return false;
- }
+ if (bo->validated_shader) {
+ DRM_ERROR("Trying to use shader BO as something other than "
+ "a shader\n");
+ return NULL;
}
- *obj = exec->bo[hindex].bo;
- return true;
+ return obj;
+}
+
+static struct drm_gem_cma_object *
+vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
+{
+ return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
}
static bool
-vc4_use_handle(struct vc4_exec_info *exec,
- uint32_t gem_handles_packet_index,
- enum vc4_bo_mode mode,
- struct drm_gem_cma_object **obj)
+validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
{
- return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index],
- mode, obj);
+ /* Note that the untrusted pointer passed to these functions is
+ * incremented past the packet byte.
+ */
+ return (untrusted - 1 == exec->bin_u + pos);
}
static uint32_t
@@ -201,14 +201,15 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
return true;
}
+
static int
-validate_flush_all(VALIDATE_ARGS)
+validate_flush(VALIDATE_ARGS)
{
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("VC4_PACKET_FLUSH_ALL after "
- "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
+ DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
return -EINVAL;
}
+ exec->found_flush = true;
return 0;
}
@@ -233,17 +234,13 @@ validate_start_tile_binning(VALIDATE_ARGS)
static int
validate_increment_semaphore(VALIDATE_ARGS)
{
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
+ DRM_ERROR("Bin CL must end with "
+ "VC4_PACKET_INCREMENT_SEMAPHORE\n");
return -EINVAL;
}
exec->found_increment_semaphore_packet = true;
- /* Once we've found the semaphore increment, there should be one FLUSH
- * then the end of the command list. The FLUSH actually triggers the
- * increment, so we only need to make sure there
- */
-
return 0;
}
@@ -257,11 +254,6 @@ validate_indexed_prim_list(VALIDATE_ARGS)
uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
struct vc4_shader_state *shader_state;
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
- return -EINVAL;
- }
-
/* Check overflow condition */
if (exec->shader_state_count == 0) {
DRM_ERROR("shader state must precede primitives\n");
@@ -272,7 +264,8 @@ validate_indexed_prim_list(VALIDATE_ARGS)
if (max_index > shader_state->max_index)
shader_state->max_index = max_index;
- if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &ib))
+ ib = vc4_use_handle(exec, 0);
+ if (!ib)
return -EINVAL;
if (offset > ib->base.size ||
@@ -295,11 +288,6 @@ validate_gl_array_primitive(VALIDATE_ARGS)
uint32_t max_index;
struct vc4_shader_state *shader_state;
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
- return -EINVAL;
- }
-
/* Check overflow condition */
if (exec->shader_state_count == 0) {
DRM_ERROR("shader state must precede primitives\n");
@@ -329,7 +317,6 @@ validate_gl_shader_state(VALIDATE_ARGS)
return -EINVAL;
}
- exec->shader_state[i].packet = VC4_PACKET_GL_SHADER_STATE;
exec->shader_state[i].addr = *(uint32_t *)untrusted;
exec->shader_state[i].max_index = 0;
@@ -348,31 +335,6 @@ validate_gl_shader_state(VALIDATE_ARGS)
}
static int
-validate_nv_shader_state(VALIDATE_ARGS)
-{
- uint32_t i = exec->shader_state_count++;
-
- if (i >= exec->shader_state_size) {
- DRM_ERROR("More requests for shader states than declared\n");
- return -EINVAL;
- }
-
- exec->shader_state[i].packet = VC4_PACKET_NV_SHADER_STATE;
- exec->shader_state[i].addr = *(uint32_t *)untrusted;
-
- if (exec->shader_state[i].addr & 15) {
- DRM_ERROR("NV shader state address 0x%08x misaligned\n",
- exec->shader_state[i].addr);
- return -EINVAL;
- }
-
- *(uint32_t *)validated = (exec->shader_state[i].addr +
- exec->shader_rec_p);
-
- return 0;
-}
-
-static int
validate_tile_binning_config(VALIDATE_ARGS)
{
struct drm_device *dev = exec->exec_bo->base.dev;
@@ -473,8 +435,8 @@ static const struct cmd_info {
} cmd_info[] = {
VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL),
- VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL),
- VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", validate_flush),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning),
VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore),
@@ -488,7 +450,7 @@ static const struct cmd_info {
VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state),
- VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state),
+ /* We don't support validating NV shader states. */
VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL),
@@ -525,7 +487,7 @@ vc4_validate_bin_cl(struct drm_device *dev,
u8 cmd = *(uint8_t *)src_pkt;
const struct cmd_info *info;
- if (cmd > ARRAY_SIZE(cmd_info)) {
+ if (cmd >= ARRAY_SIZE(cmd_info)) {
DRM_ERROR("0x%08x: packet %d out of bounds\n",
src_offset, cmd);
return -EINVAL;
@@ -580,8 +542,16 @@ vc4_validate_bin_cl(struct drm_device *dev,
return -EINVAL;
}
- if (!exec->found_increment_semaphore_packet) {
- DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
+ * semaphore is used to trigger the render CL to start up, and the
+ * FLUSH is what caps the bin lists with
+ * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
+ * render CL when they get called to) and actually triggers the queued
+ * semaphore increment.
+ */
+ if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
+ DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
+ "VC4_PACKET_FLUSH\n");
return -EINVAL;
}
@@ -612,18 +582,19 @@ reloc_tex(struct vc4_exec_info *exec,
uint32_t cube_map_stride = 0;
enum vc4_texture_data_type type;
- if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, &tex))
+ tex = vc4_use_bo(exec, texture_handle_index);
+ if (!tex)
return false;
if (sample->is_direct) {
uint32_t remaining_size = tex->base.size - p0;
if (p0 > tex->base.size - 4) {
DRM_ERROR("UBO offset greater than UBO size\n");
- return false;
+ goto fail;
}
if (p1 > remaining_size - 4) {
DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
- return false;
+ goto fail;
}
*validated_p0 = tex->paddr + p0;
return true;
@@ -642,14 +613,14 @@ reloc_tex(struct vc4_exec_info *exec,
VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
if (cube_map_stride) {
DRM_ERROR("Cube map stride set twice\n");
- return false;
+ goto fail;
}
cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
}
if (!cube_map_stride) {
DRM_ERROR("Cube map stride not set\n");
- return false;
+ goto fail;
}
}
@@ -683,7 +654,7 @@ reloc_tex(struct vc4_exec_info *exec,
case VC4_TEXTURE_TYPE_YUV422R:
default:
DRM_ERROR("Texture format %d unsupported\n", type);
- return false;
+ goto fail;
}
utile_w = utile_width(cpp);
utile_h = utile_height(cpp);
@@ -699,7 +670,7 @@ reloc_tex(struct vc4_exec_info *exec,
if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
tiling_format, width, height, cpp)) {
- return false;
+ goto fail;
}
/* The mipmap levels are stored before the base of the texture. Make
@@ -740,7 +711,7 @@ reloc_tex(struct vc4_exec_info *exec,
i, level_width, level_height,
aligned_width, aligned_height,
level_size, offset);
- return false;
+ goto fail;
}
offset -= level_size;
@@ -749,54 +720,37 @@ reloc_tex(struct vc4_exec_info *exec,
*validated_p0 = tex->paddr + p0;
return true;
+ fail:
+ DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
+ DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
+ DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
+ DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
+ return false;
}
static int
-validate_shader_rec(struct drm_device *dev,
- struct vc4_exec_info *exec,
- struct vc4_shader_state *state)
+validate_gl_shader_rec(struct drm_device *dev,
+ struct vc4_exec_info *exec,
+ struct vc4_shader_state *state)
{
uint32_t *src_handles;
void *pkt_u, *pkt_v;
- enum shader_rec_reloc_type {
- RELOC_CODE,
- RELOC_VBO,
- };
- struct shader_rec_reloc {
- enum shader_rec_reloc_type type;
- uint32_t offset;
- };
- static const struct shader_rec_reloc gl_relocs[] = {
- { RELOC_CODE, 4 }, /* fs */
- { RELOC_CODE, 16 }, /* vs */
- { RELOC_CODE, 28 }, /* cs */
+ static const uint32_t shader_reloc_offsets[] = {
+ 4, /* fs */
+ 16, /* vs */
+ 28, /* cs */
};
- static const struct shader_rec_reloc nv_relocs[] = {
- { RELOC_CODE, 4 }, /* fs */
- { RELOC_VBO, 12 }
- };
- const struct shader_rec_reloc *relocs;
- struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
- uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size;
+ uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
+ struct drm_gem_cma_object *bo[shader_reloc_count + 8];
+ uint32_t nr_attributes, nr_relocs, packet_size;
int i;
- struct vc4_validated_shader_info *validated_shader = NULL;
-
- if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
- relocs = nv_relocs;
- nr_fixed_relocs = ARRAY_SIZE(nv_relocs);
- packet_size = 16;
- } else {
- relocs = gl_relocs;
- nr_fixed_relocs = ARRAY_SIZE(gl_relocs);
-
- nr_attributes = state->addr & 0x7;
- if (nr_attributes == 0)
- nr_attributes = 8;
- packet_size = gl_shader_rec_size(state->addr);
- }
- nr_relocs = nr_fixed_relocs + nr_attributes;
+ nr_attributes = state->addr & 0x7;
+ if (nr_attributes == 0)
+ nr_attributes = 8;
+ packet_size = gl_shader_rec_size(state->addr);
+ nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
if (nr_relocs * 4 > exec->shader_rec_size) {
DRM_ERROR("overflowed shader recs reading %d handles "
"from %d bytes left\n",
@@ -826,21 +780,30 @@ validate_shader_rec(struct drm_device *dev,
exec->shader_rec_v += roundup(packet_size, 16);
exec->shader_rec_size -= packet_size;
- for (i = 0; i < nr_relocs; i++) {
- enum vc4_bo_mode mode;
-
- if (i < nr_fixed_relocs && relocs[i].type == RELOC_CODE)
- mode = VC4_MODE_SHADER;
- else
- mode = VC4_MODE_RENDER;
+ if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
+ DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
+ return -EINVAL;
+ }
- if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) {
- return false;
+ for (i = 0; i < shader_reloc_count; i++) {
+ if (src_handles[i] > exec->bo_count) {
+ DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
+ return -EINVAL;
}
+
+ bo[i] = exec->bo[src_handles[i]];
+ if (!bo[i])
+ return -EINVAL;
+ }
+ for (i = shader_reloc_count; i < nr_relocs; i++) {
+ bo[i] = vc4_use_bo(exec, src_handles[i]);
+ if (!bo[i])
+ return -EINVAL;
}
- for (i = 0; i < nr_fixed_relocs; i++) {
- uint32_t o = relocs[i].offset;
+ for (i = 0; i < shader_reloc_count; i++) {
+ struct vc4_validated_shader_info *validated_shader;
+ uint32_t o = shader_reloc_offsets[i];
uint32_t src_offset = *(uint32_t *)(pkt_u + o);
uint32_t *texture_handles_u;
void *uniform_data_u;
@@ -848,58 +811,50 @@ validate_shader_rec(struct drm_device *dev,
*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
- switch (relocs[i].type) {
- case RELOC_CODE:
- if (src_offset != 0) {
- DRM_ERROR("Shaders must be at offset 0 of "
- "the BO.\n");
- goto fail;
- }
+ if (src_offset != 0) {
+ DRM_ERROR("Shaders must be at offset 0 of "
+ "the BO.\n");
+ return -EINVAL;
+ }
- kfree(validated_shader);
- validated_shader = vc4_validate_shader(bo[i]);
- if (!validated_shader)
- goto fail;
+ validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
+ if (!validated_shader)
+ return -EINVAL;
- if (validated_shader->uniforms_src_size >
- exec->uniforms_size) {
- DRM_ERROR("Uniforms src buffer overflow\n");
- goto fail;
- }
+ if (validated_shader->uniforms_src_size >
+ exec->uniforms_size) {
+ DRM_ERROR("Uniforms src buffer overflow\n");
+ return -EINVAL;
+ }
- texture_handles_u = exec->uniforms_u;
- uniform_data_u = (texture_handles_u +
- validated_shader->num_texture_samples);
-
- memcpy(exec->uniforms_v, uniform_data_u,
- validated_shader->uniforms_size);
-
- for (tex = 0;
- tex < validated_shader->num_texture_samples;
- tex++) {
- if (!reloc_tex(exec,
- uniform_data_u,
- &validated_shader->texture_samples[tex],
- texture_handles_u[tex])) {
- goto fail;
- }
- }
+ texture_handles_u = exec->uniforms_u;
+ uniform_data_u = (texture_handles_u +
+ validated_shader->num_texture_samples);
- *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
+ memcpy(exec->uniforms_v, uniform_data_u,
+ validated_shader->uniforms_size);
- exec->uniforms_u += validated_shader->uniforms_src_size;
- exec->uniforms_v += validated_shader->uniforms_size;
- exec->uniforms_p += validated_shader->uniforms_size;
+ for (tex = 0;
+ tex < validated_shader->num_texture_samples;
+ tex++) {
+ if (!reloc_tex(exec,
+ uniform_data_u,
+ &validated_shader->texture_samples[tex],
+ texture_handles_u[tex])) {
+ return -EINVAL;
+ }
+ }
- break;
+ *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
- case RELOC_VBO:
- break;
- }
+ exec->uniforms_u += validated_shader->uniforms_src_size;
+ exec->uniforms_v += validated_shader->uniforms_size;
+ exec->uniforms_p += validated_shader->uniforms_size;
}
for (i = 0; i < nr_attributes; i++) {
- struct drm_gem_cma_object *vbo = bo[nr_fixed_relocs + i];
+ struct drm_gem_cma_object *vbo =
+ bo[ARRAY_SIZE(shader_reloc_offsets) + i];
uint32_t o = 36 + i * 8;
uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
@@ -929,13 +884,7 @@ validate_shader_rec(struct drm_device *dev,
*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
}
- kfree(validated_shader);
-
return 0;
-
-fail:
- kfree(validated_shader);
- return -EINVAL;
}
int
@@ -946,7 +895,7 @@ vc4_validate_shader_recs(struct drm_device *dev,
int ret = 0;
for (i = 0; i < exec->shader_state_count; i++) {
- ret = validate_shader_rec(dev, exec, &exec->shader_state[i]);
+ ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
if (ret)
return ret;
}
diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c
index d29e2c9c318..e52a1941730 100644
--- a/src/gallium/drivers/vc4/vc4_blit.c
+++ b/src/gallium/drivers/vc4/vc4_blit.c
@@ -94,7 +94,7 @@ vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
struct vc4_context *vc4 = vc4_context(ctx);
if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
- fprintf(stderr, "blit unsupported %s -> %s",
+ fprintf(stderr, "blit unsupported %s -> %s\n",
util_format_short_name(info->src.resource->format),
util_format_short_name(info->dst.resource->format));
return false;
@@ -135,7 +135,7 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
info.dst.resource->nr_samples <= 1 &&
!util_format_is_depth_or_stencil(info.src.resource->format) &&
!util_format_is_pure_integer(info.src.resource->format)) {
- fprintf(stderr, "color resolve unimplemented");
+ fprintf(stderr, "color resolve unimplemented\n");
return;
}
@@ -147,7 +147,7 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
}
if (info.mask & PIPE_MASK_S) {
- fprintf(stderr, "cannot blit stencil, skipping");
+ fprintf(stderr, "cannot blit stencil, skipping\n");
info.mask &= ~PIPE_MASK_S;
}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index cbdb9e89cf6..f7b41f5816d 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2014 Broadcom
+ * Copyright © 2014-2015 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -94,7 +94,7 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
* allocate something new instead, since we assume that the
* user will proceed to CPU map it and fill it with stuff.
*/
- if (!vc4_bo_wait(bo, 0)) {
+ if (!vc4_bo_wait(bo, 0, NULL)) {
pipe_mutex_unlock(cache->lock);
return NULL;
}
@@ -381,15 +381,57 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo)
}
struct vc4_bo *
-vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data, uint32_t size,
- const char *name)
+vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data, uint32_t size)
{
- void *map;
struct vc4_bo *bo;
+ int ret;
+
+ bo = CALLOC_STRUCT(vc4_bo);
+ if (!bo)
+ return NULL;
+
+ pipe_reference_init(&bo->reference, 1);
+ bo->screen = screen;
+ bo->size = align(size, 4096);
+ bo->name = "code";
+ bo->private = false; /* Make sure it doesn't go back to the cache. */
+
+ if (!using_vc4_simulator) {
+ struct drm_vc4_create_shader_bo create = {
+ .size = size,
+ .data = (uintptr_t)data,
+ };
+
+ ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_CREATE_SHADER_BO,
+ &create);
+ bo->handle = create.handle;
+ } else {
+ struct drm_mode_create_dumb create;
+ memset(&create, 0, sizeof(create));
+
+ create.width = 128;
+ create.bpp = 8;
+ create.height = (size + 127) / 128;
+
+ ret = drmIoctl(screen->fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
+ bo->handle = create.handle;
+ assert(create.size >= size);
+
+ vc4_bo_map(bo);
+ memcpy(bo->map, data, size);
+ }
+ if (ret != 0) {
+ fprintf(stderr, "create shader ioctl failure\n");
+ abort();
+ }
+
+ screen->bo_count++;
+ screen->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Allocated shader %dkb:\n", size / 1024);
+ vc4_bo_dump_stats(screen);
+ }
- bo = vc4_bo_alloc(screen, size, name);
- map = vc4_bo_map(bo);
- memcpy(map, data, size);
return bo;
}
@@ -413,63 +455,91 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
return true;
}
+static int vc4_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns)
+{
+ if (using_vc4_simulator)
+ return 0;
+
+ struct drm_vc4_wait_seqno wait = {
+ .seqno = seqno,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
+}
+
bool
-vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns)
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason)
{
if (screen->finished_seqno >= seqno)
return true;
- struct drm_vc4_wait_seqno wait;
- memset(&wait, 0, sizeof(wait));
- wait.seqno = seqno;
- wait.timeout_ns = timeout_ns;
-
- int ret;
- if (!using_vc4_simulator)
- ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
- else {
- wait.seqno = screen->finished_seqno;
- ret = 0;
+ if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc4_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on seqno %lld for %s\n",
+ (long long)seqno, reason);
+ }
}
- if (ret == 0) {
- screen->finished_seqno = wait.seqno;
- return true;
- }
+ int ret = vc4_wait_seqno_ioctl(screen->fd, seqno, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
- if (errno != ETIME) {
- fprintf(stderr, "wait failed: %d\n", ret);
- abort();
+ return false;
}
- return false;
+ screen->finished_seqno = seqno;
+ return true;
+}
+
+static int vc4_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
+{
+ if (using_vc4_simulator)
+ return 0;
+
+ struct drm_vc4_wait_bo wait = {
+ .handle = handle,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
}
bool
-vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns)
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns, const char *reason)
{
struct vc4_screen *screen = bo->screen;
- struct drm_vc4_wait_bo wait;
- memset(&wait, 0, sizeof(wait));
- wait.handle = bo->handle;
- wait.timeout_ns = timeout_ns;
-
- int ret;
- if (!using_vc4_simulator)
- ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
- else
- ret = 0;
+ if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc4_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on %s BO for %s\n",
+ bo->name, reason);
+ }
+ }
- if (ret == 0)
- return true;
+ int ret = vc4_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
- if (errno != ETIME) {
- fprintf(stderr, "wait failed: %d\n", ret);
- abort();
+ return false;
}
- return false;
+ return true;
}
void *
@@ -515,7 +585,7 @@ vc4_bo_map(struct vc4_bo *bo)
{
void *map = vc4_bo_map_unsynchronized(bo);
- bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE);
+ bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map");
if (!ok) {
fprintf(stderr, "BO wait for map failed\n");
abort();
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h
index 7320695ca8e..b77506e242a 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -58,8 +58,8 @@ struct vc4_bo {
struct vc4_bo *vc4_bo_alloc(struct vc4_screen *screen, uint32_t size,
const char *name);
-struct vc4_bo *vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data,
- uint32_t size, const char *name);
+struct vc4_bo *vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data,
+ uint32_t size);
void vc4_bo_last_unreference(struct vc4_bo *bo);
void vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time);
struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
@@ -113,10 +113,11 @@ void *
vc4_bo_map_unsynchronized(struct vc4_bo *bo);
bool
-vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns);
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns, const char *reason);
bool
-vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns);
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason);
void
vc4_bufmgr_destroy(struct pipe_screen *pscreen);
diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c
index 0700e885cbf..ced4f2dfa86 100644
--- a/src/gallium/drivers/vc4/vc4_cl.c
+++ b/src/gallium/drivers/vc4/vc4_cl.c
@@ -36,11 +36,12 @@ vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl)
void
cl_ensure_space(struct vc4_cl *cl, uint32_t space)
{
- if ((cl->next - cl->base) + space <= cl->size)
+ uint32_t offset = cl_offset(cl);
+
+ if (offset + space <= cl->size)
return;
uint32_t size = MAX2(cl->size + space, cl->size * 2);
- uint32_t offset = cl->next -cl->base;
cl->base = reralloc(ralloc_parent(cl->base), cl->base, uint8_t, size);
cl->size = size;
@@ -60,15 +61,20 @@ vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo)
uint32_t hindex;
uint32_t *current_handles = vc4->bo_handles.base;
- for (hindex = 0;
- hindex < (vc4->bo_handles.next - vc4->bo_handles.base) / 4;
- hindex++) {
+ for (hindex = 0; hindex < cl_offset(&vc4->bo_handles) / 4; hindex++) {
if (current_handles[hindex] == bo->handle)
return hindex;
}
- cl_u32(&vc4->bo_handles, bo->handle);
- cl_ptr(&vc4->bo_pointers, vc4_bo_reference(bo));
+ struct vc4_cl_out *out;
+
+ out = cl_start(&vc4->bo_handles);
+ cl_u32(&out, bo->handle);
+ cl_end(&vc4->bo_handles, out);
+
+ out = cl_start(&vc4->bo_pointers);
+ cl_ptr(&out, vc4_bo_reference(bo));
+ cl_end(&vc4->bo_pointers, out);
return hindex;
}
diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index 4a50e790942..bf4be0efc29 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -33,12 +33,20 @@
struct vc4_bo;
+/**
+ * Undefined structure, used for typechecking that you're passing the pointers
+ * to these functions correctly.
+ */
+struct vc4_cl_out;
+
struct vc4_cl {
void *base;
- void *next;
+ struct vc4_cl_out *next;
+ struct vc4_cl_out *reloc_next;
uint32_t size;
- uint32_t reloc_next;
+#ifdef DEBUG
uint32_t reloc_count;
+#endif
};
void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl);
@@ -49,135 +57,149 @@ uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo);
struct PACKED unaligned_16 { uint16_t x; };
struct PACKED unaligned_32 { uint32_t x; };
-static inline void
-put_unaligned_32(void *ptr, uint32_t val)
+static inline uint32_t cl_offset(struct vc4_cl *cl)
{
- struct unaligned_32 *p = ptr;
- p->x = val;
+ return (char *)cl->next - (char *)cl->base;
}
static inline void
-put_unaligned_16(void *ptr, uint16_t val)
+cl_advance(struct vc4_cl_out **cl, uint32_t n)
{
- struct unaligned_16 *p = ptr;
- p->x = val;
+ (*cl) = (struct vc4_cl_out *)((char *)(*cl) + n);
}
-static inline void
-cl_u8(struct vc4_cl *cl, uint8_t n)
+static inline struct vc4_cl_out *
+cl_start(struct vc4_cl *cl)
{
- assert((cl->next - cl->base) + 1 <= cl->size);
-
- *(uint8_t *)cl->next = n;
- cl->next++;
+ return cl->next;
}
static inline void
-cl_u16(struct vc4_cl *cl, uint16_t n)
+cl_end(struct vc4_cl *cl, struct vc4_cl_out *next)
{
- assert((cl->next - cl->base) + 2 <= cl->size);
+ cl->next = next;
+ assert(cl_offset(cl) <= cl->size);
+}
- put_unaligned_16(cl->next, n);
- cl->next += 2;
+
+static inline void
+put_unaligned_32(struct vc4_cl_out *ptr, uint32_t val)
+{
+ struct unaligned_32 *p = (void *)ptr;
+ p->x = val;
}
static inline void
-cl_u32(struct vc4_cl *cl, uint32_t n)
+put_unaligned_16(struct vc4_cl_out *ptr, uint16_t val)
{
- assert((cl->next - cl->base) + 4 <= cl->size);
+ struct unaligned_16 *p = (void *)ptr;
+ p->x = val;
+}
- put_unaligned_32(cl->next, n);
- cl->next += 4;
+static inline void
+cl_u8(struct vc4_cl_out **cl, uint8_t n)
+{
+ *(uint8_t *)(*cl) = n;
+ cl_advance(cl, 1);
}
static inline void
-cl_aligned_u32(struct vc4_cl *cl, uint32_t n)
+cl_u16(struct vc4_cl_out **cl, uint16_t n)
{
- assert((cl->next - cl->base) + 4 <= cl->size);
+ put_unaligned_16(*cl, n);
+ cl_advance(cl, 2);
+}
- *(uint32_t *)cl->next = n;
- cl->next += 4;
+static inline void
+cl_u32(struct vc4_cl_out **cl, uint32_t n)
+{
+ put_unaligned_32(*cl, n);
+ cl_advance(cl, 4);
}
static inline void
-cl_ptr(struct vc4_cl *cl, void *ptr)
+cl_aligned_u32(struct vc4_cl_out **cl, uint32_t n)
{
- assert((cl->next - cl->base) + sizeof(void *) <= cl->size);
+ *(uint32_t *)(*cl) = n;
+ cl_advance(cl, 4);
+}
- *(void **)cl->next = ptr;
- cl->next += sizeof(void *);
+static inline void
+cl_ptr(struct vc4_cl_out **cl, void *ptr)
+{
+ *(struct vc4_cl_out **)(*cl) = ptr;
+ cl_advance(cl, sizeof(void *));
}
static inline void
-cl_f(struct vc4_cl *cl, float f)
+cl_f(struct vc4_cl_out **cl, float f)
{
cl_u32(cl, fui(f));
}
static inline void
-cl_aligned_f(struct vc4_cl *cl, float f)
+cl_aligned_f(struct vc4_cl_out **cl, float f)
{
cl_aligned_u32(cl, fui(f));
}
static inline void
-cl_start_reloc(struct vc4_cl *cl, uint32_t n)
+cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
{
assert(n == 1 || n == 2);
+#ifdef DEBUG
assert(cl->reloc_count == 0);
cl->reloc_count = n;
+#endif
- cl_u8(cl, VC4_PACKET_GEM_HANDLES);
- cl->reloc_next = cl->next - cl->base;
- cl_u32(cl, 0); /* Space where hindex will be written. */
- cl_u32(cl, 0); /* Space where hindex will be written. */
+ cl_u8(out, VC4_PACKET_GEM_HANDLES);
+ cl->reloc_next = *out;
+ cl_u32(out, 0); /* Space where hindex will be written. */
+ cl_u32(out, 0); /* Space where hindex will be written. */
}
-static inline void
+static inline struct vc4_cl_out *
cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
{
+#ifdef DEBUG
assert(cl->reloc_count == 0);
cl->reloc_count = n;
- cl->reloc_next = cl->next - cl->base;
+#endif
+ cl->reloc_next = cl->next;
+
+ /* Reserve the space where hindex will be written. */
+ cl_advance(&cl->next, n * 4);
- /* Space where hindex will be written. */
- cl->next += n * 4;
+ return cl->next;
}
static inline void
-cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
+cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
+ struct vc4_bo *bo, uint32_t offset)
{
- *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
- cl->reloc_next += 4;
+ *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+ cl_advance(&cl->reloc_next, 4);
+#ifdef DEBUG
cl->reloc_count--;
+#endif
- cl_u32(cl, offset);
+ cl_u32(cl_out, offset);
}
static inline void
-cl_aligned_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
+cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
+ struct vc4_cl_out **cl_out,
+ struct vc4_bo *bo, uint32_t offset)
{
- *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
- cl->reloc_next += 4;
+ *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+ cl_advance(&cl->reloc_next, 4);
+#ifdef DEBUG
cl->reloc_count--;
+#endif
- cl_aligned_u32(cl, offset);
-}
-
-static inline void
-cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
- struct vc4_bo *bo, uint32_t offset)
-{
- cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
-}
-
-static inline void
-cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
- struct vc4_bo *bo, uint32_t offset)
-{
- cl_aligned_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
+ cl_aligned_u32(cl_out, offset);
}
void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 69055081daa..6d748010baf 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -34,7 +34,7 @@ dump_float(void *cl, uint32_t offset, uint32_t hw_offset)
void *f = cl + offset;
fprintf(stderr, "0x%08x 0x%08x: %f (0x%08x)\n",
- offset, hw_offset, *(float *)f, *(uint32_t *)f);
+ offset, hw_offset, uif(*(uint32_t *)f), *(uint32_t *)f);
}
static void
@@ -47,7 +47,33 @@ dump_VC4_PACKET_BRANCH_TO_SUB_LIST(void *cl, uint32_t offset, uint32_t hw_offset
}
static void
-dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+dump_loadstore_full(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ uint32_t bits = *(uint32_t *)(cl + offset);
+
+ fprintf(stderr, "0x%08x 0x%08x: addr 0x%08x%s%s%s%s\n",
+ offset, hw_offset,
+ bits & ~0xf,
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL) ? "" : " clear",
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_ZS) ? "" : " zs",
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_COLOR) ? "" : " color",
+ (bits & VC4_LOADSTORE_FULL_RES_EOF) ? " eof" : "");
+}
+
+static void
+dump_VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_full(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_STORE_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_full(cl, offset, hw_offset);
+}
+
+static void
+dump_loadstore_general(void *cl, uint32_t offset, uint32_t hw_offset)
{
uint8_t *bytes = cl + offset;
uint32_t *addr = cl + offset + 2;
@@ -125,6 +151,18 @@ dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw
}
static void
+dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_general(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_general(cl, offset, hw_offset);
+}
+
+static void
dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
{
uint32_t *bits = cl + offset;
@@ -291,63 +329,63 @@ dump_VC4_PACKET_GEM_HANDLES(void *cl, uint32_t offset, uint32_t hw_offset)
offset, hw_offset, handles[0], handles[1]);
}
-#define PACKET_DUMP(name, size) [name] = { #name, size, dump_##name }
-#define PACKET(name, size) [name] = { #name, size, NULL }
+#define PACKET_DUMP(name) [name] = { #name, name ## _SIZE, dump_##name }
+#define PACKET(name) [name] = { #name, name ## _SIZE, NULL }
static const struct packet_info {
const char *name;
uint8_t size;
void (*dump_func)(void *cl, uint32_t offset, uint32_t hw_offset);
} packet_info[] = {
- PACKET(VC4_PACKET_HALT, 1),
- PACKET(VC4_PACKET_NOP, 1),
-
- PACKET(VC4_PACKET_FLUSH, 1),
- PACKET(VC4_PACKET_FLUSH_ALL, 1),
- PACKET(VC4_PACKET_START_TILE_BINNING, 1),
- PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1),
- PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 1),
-
- PACKET(VC4_PACKET_BRANCH, 5),
- PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST, 5),
-
- PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 1),
- PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 1),
- PACKET(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER, 5),
- PACKET(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER, 5),
- PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 7),
- PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 7),
-
- PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 14),
- PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 10),
-
- PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE, 48),
- PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE, 49),
-
- PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 2),
-
- PACKET(VC4_PACKET_GL_SHADER_STATE, 5),
- PACKET(VC4_PACKET_NV_SHADER_STATE, 5),
- PACKET(VC4_PACKET_VG_SHADER_STATE, 5),
-
- PACKET(VC4_PACKET_CONFIGURATION_BITS, 4),
- PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS, 5),
- PACKET_DUMP(VC4_PACKET_POINT_SIZE, 5),
- PACKET_DUMP(VC4_PACKET_LINE_WIDTH, 5),
- PACKET(VC4_PACKET_RHT_X_BOUNDARY, 3),
- PACKET(VC4_PACKET_DEPTH_OFFSET, 5),
- PACKET(VC4_PACKET_CLIP_WINDOW, 9),
- PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET, 5),
- PACKET(VC4_PACKET_Z_CLIPPING, 9),
- PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING, 9),
- PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING, 9),
-
- PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 16),
- PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 11),
- PACKET(VC4_PACKET_CLEAR_COLORS, 14),
- PACKET_DUMP(VC4_PACKET_TILE_COORDINATES, 3),
-
- PACKET_DUMP(VC4_PACKET_GEM_HANDLES, 9),
+ PACKET(VC4_PACKET_HALT),
+ PACKET(VC4_PACKET_NOP),
+
+ PACKET(VC4_PACKET_FLUSH),
+ PACKET(VC4_PACKET_FLUSH_ALL),
+ PACKET(VC4_PACKET_START_TILE_BINNING),
+ PACKET(VC4_PACKET_INCREMENT_SEMAPHORE),
+ PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE),
+
+ PACKET(VC4_PACKET_BRANCH),
+ PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST),
+
+ PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER),
+ PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF),
+ PACKET_DUMP(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER),
+ PACKET_DUMP(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER),
+ PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL),
+ PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
+
+ PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE),
+ PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
+
+ PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE),
+ PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE),
+
+ PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT),
+
+ PACKET(VC4_PACKET_GL_SHADER_STATE),
+ PACKET(VC4_PACKET_NV_SHADER_STATE),
+ PACKET(VC4_PACKET_VG_SHADER_STATE),
+
+ PACKET(VC4_PACKET_CONFIGURATION_BITS),
+ PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS),
+ PACKET_DUMP(VC4_PACKET_POINT_SIZE),
+ PACKET_DUMP(VC4_PACKET_LINE_WIDTH),
+ PACKET(VC4_PACKET_RHT_X_BOUNDARY),
+ PACKET(VC4_PACKET_DEPTH_OFFSET),
+ PACKET(VC4_PACKET_CLIP_WINDOW),
+ PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET),
+ PACKET(VC4_PACKET_Z_CLIPPING),
+ PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING),
+ PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING),
+
+ PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG),
+ PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG),
+ PACKET(VC4_PACKET_CLEAR_COLORS),
+ PACKET_DUMP(VC4_PACKET_TILE_COORDINATES),
+
+ PACKET_DUMP(VC4_PACKET_GEM_HANDLES),
};
void
@@ -359,7 +397,7 @@ vc4_dump_cl(void *cl, uint32_t size, bool is_render)
while (offset < size) {
uint8_t header = cmds[offset];
- if (header > ARRAY_SIZE(packet_info) ||
+ if (header >= ARRAY_SIZE(packet_info) ||
!packet_info[header].name) {
fprintf(stderr, "0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n",
offset, hw_offset, header, header);
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index 630f8e68896..fff63158c9d 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -61,9 +61,11 @@ vc4_flush(struct pipe_context *pctx)
* FLUSH completes.
*/
cl_ensure_space(&vc4->bcl, 8);
- cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
/* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
- cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
+ cl_u8(&bcl, VC4_PACKET_FLUSH);
+ cl_end(&vc4->bcl, bcl);
if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
pipe_surface_reference(&vc4->color_write, cbuf);
@@ -103,8 +105,10 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
vc4_flush(pctx);
if (fence) {
+ struct pipe_screen *screen = pctx->screen;
struct vc4_fence *f = vc4_fence_create(vc4->screen,
vc4->last_emit_seqno);
+ screen->fence_reference(screen, fence, NULL);
*fence = (struct pipe_fence_handle *)f;
}
}
@@ -126,8 +130,7 @@ vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo)
* they match.
*/
struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
- for (int i = 0; i < (vc4->bo_handles.next -
- vc4->bo_handles.base) / 4; i++) {
+ for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
if (referenced_bos[i] == bo) {
return true;
}
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index d5d6be16f6e..654c46f3c0d 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -67,7 +67,20 @@
#define VC4_DIRTY_CLIP (1 << 20)
#define VC4_DIRTY_UNCOMPILED_VS (1 << 21)
#define VC4_DIRTY_UNCOMPILED_FS (1 << 22)
-#define VC4_DIRTY_COMPILED_FS (1 << 24)
+#define VC4_DIRTY_COMPILED_CS (1 << 23)
+#define VC4_DIRTY_COMPILED_VS (1 << 24)
+#define VC4_DIRTY_COMPILED_FS (1 << 25)
+
+struct vc4_sampler_view {
+ struct pipe_sampler_view base;
+ uint32_t texture_p0;
+ uint32_t texture_p1;
+};
+
+struct vc4_sampler_state {
+ struct pipe_sampler_state base;
+ uint32_t texture_p1;
+};
struct vc4_texture_stateobj {
struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
@@ -121,6 +134,12 @@ struct vc4_compiled_shader {
struct vc4_ubo_range *ubo_ranges;
uint32_t num_ubo_ranges;
uint32_t ubo_size;
+ /**
+ * VC4_DIRTY_* flags that, when set in vc4->dirty, mean that the
+ * uniforms have to be rewritten (and therefore the shader state
+ * reemitted).
+ */
+ uint32_t uniform_dirty_bits;
/** bitmask of which inputs are color inputs, for flat shade handling. */
uint32_t color_inputs;
@@ -238,6 +257,11 @@ struct vc4_context {
*/
bool draw_call_queued;
+ /** Maximum index buffer valid for the current shader_rec. */
+ uint32_t max_index;
+ /** Last index bias baked into the current shader_rec. */
+ uint32_t last_index_bias;
+
struct primconvert_context *primconvert;
struct hash_table *fs_cache, *vs_cache;
@@ -246,6 +270,7 @@ struct vc4_context {
struct ra_regs *regs;
unsigned int reg_class_any;
+ unsigned int reg_class_r4_or_a;
unsigned int reg_class_a;
uint8_t prim_mode;
@@ -326,6 +351,18 @@ vc4_context(struct pipe_context *pcontext)
return (struct vc4_context *)pcontext;
}
+static inline struct vc4_sampler_view *
+vc4_sampler_view(struct pipe_sampler_view *psview)
+{
+ return (struct vc4_sampler_view *)psview;
+}
+
+static inline struct vc4_sampler_state *
+vc4_sampler_state(struct pipe_sampler_state *psampler)
+{
+ return (struct vc4_sampler_state *)psampler;
+}
+
struct pipe_context *vc4_context_create(struct pipe_screen *pscreen,
void *priv);
void vc4_draw_init(struct pipe_context *pctx);
@@ -337,6 +374,7 @@ void vc4_simulator_init(struct vc4_screen *screen);
int vc4_simulator_flush(struct vc4_context *vc4,
struct drm_vc4_submit_cl *args);
+void vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader);
void vc4_write_uniforms(struct vc4_context *vc4,
struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 5e6d70d6f33..a4e5e092b1a 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -71,37 +71,40 @@ vc4_start_draw(struct vc4_context *vc4)
uint32_t height = vc4->framebuffer.height;
uint32_t tilew = align(width, 64) / 64;
uint32_t tileh = align(height, 64) / 64;
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
// Tile state data is 48 bytes per tile, I think it can be thrown away
// as soon as binning is finished.
- cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
- cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */
- cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */
- cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */
- cl_u8(&vc4->bcl, tilew);
- cl_u8(&vc4->bcl, tileh);
- cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */
+ cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
+ cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
+ cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
+ cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
+ cl_u8(&bcl, tilew);
+ cl_u8(&bcl, tileh);
+ cl_u8(&bcl, 0); /* flags, filled by kernel. */
/* START_TILE_BINNING resets the statechange counters in the hardware,
* which are what is used when a primitive is binned to a tile to
* figure out what new state packets need to be written to that tile's
* command list.
*/
- cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
+ cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
/* Reset the current compressed primitives format. This gets modified
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
* of every tile.
*/
- cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
- cl_u8(&vc4->bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
- VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
+ cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
+ cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
+ VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
vc4->needs_flush = true;
vc4->draw_call_queued = true;
vc4->draw_width = width;
vc4->draw_height = height;
+
+ cl_end(&vc4->bcl, bcl);
}
static void
@@ -119,96 +122,67 @@ vc4_update_shadow_textures(struct pipe_context *pctx,
}
static void
-vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info)
{
- struct vc4_context *vc4 = vc4_context(pctx);
-
- if (info->mode >= PIPE_PRIM_QUADS) {
- util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
- util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
- util_primconvert_draw_vbo(vc4->primconvert, info);
- perf_debug("Fallback conversion for %d %s vertices\n",
- info->count, u_prim_name(info->mode));
- return;
- }
-
- /* Before setting up the draw, do any fixup blits necessary. */
- vc4_update_shadow_textures(pctx, &vc4->verttex);
- vc4_update_shadow_textures(pctx, &vc4->fragtex);
-
- vc4_get_draw_cl_space(vc4);
-
+ /* VC4_DIRTY_VTXSTATE */
struct vc4_vertex_stateobj *vtx = vc4->vtx;
+ /* VC4_DIRTY_VTXBUF */
struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
- if (vc4->prim_mode != info->mode) {
- vc4->prim_mode = info->mode;
- vc4->dirty |= VC4_DIRTY_PRIM_MODE;
- }
-
- vc4_start_draw(vc4);
- vc4_update_compiled_shaders(vc4, info->mode);
-
- vc4_emit_state(pctx);
- vc4->dirty = 0;
-
- vc4_write_uniforms(vc4, vc4->prog.fs,
- &vc4->constbuf[PIPE_SHADER_FRAGMENT],
- &vc4->fragtex);
- vc4_write_uniforms(vc4, vc4->prog.vs,
- &vc4->constbuf[PIPE_SHADER_VERTEX],
- &vc4->verttex);
- vc4_write_uniforms(vc4, vc4->prog.cs,
- &vc4->constbuf[PIPE_SHADER_VERTEX],
- &vc4->verttex);
-
/* The simulator throws a fit if VS or CS don't read an attribute, so
* we emit a dummy read.
*/
uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
/* Emit the shader record. */
- cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
- cl_u16(&vc4->shader_rec,
+ struct vc4_cl_out *shader_rec =
+ cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
+ /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
+ cl_u16(&shader_rec,
VC4_SHADER_FLAG_ENABLE_CLIPPING |
+ VC4_SHADER_FLAG_FS_SINGLE_THREAD |
((info->mode == PIPE_PRIM_POINTS &&
vc4->rasterizer->base.point_size_per_vertex) ?
VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
- cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
- cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
- cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live);
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
- cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live);
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
+
+ /* VC4_DIRTY_COMPILED_FS */
+ cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
+ cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+ /* VC4_DIRTY_COMPILED_VS */
+ cl_u16(&shader_rec, 0); /* vs num uniforms */
+ cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
+ cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+ /* VC4_DIRTY_COMPILED_CS */
+ cl_u16(&shader_rec, 0); /* cs num uniforms */
+ cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
+ cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
uint32_t max_index = 0xffff;
- uint32_t vpm_offset = 0;
for (int i = 0; i < vtx->num_elements; i++) {
struct pipe_vertex_element *elem = &vtx->pipe[i];
struct pipe_vertex_buffer *vb =
&vertexbuf->vb[elem->vertex_buffer_index];
struct vc4_resource *rsc = vc4_resource(vb->buffer);
- uint32_t offset = vb->buffer_offset + elem->src_offset;
+ /* not vc4->dirty tracked: vc4->last_index_bias */
+ uint32_t offset = (vb->buffer_offset +
+ elem->src_offset +
+ vb->stride * info->index_bias);
uint32_t vb_size = rsc->bo->size - offset;
uint32_t elem_size =
util_format_get_blocksize(elem->src_format);
- cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
- cl_u8(&vc4->shader_rec, elem_size - 1);
- cl_u8(&vc4->shader_rec, vb->stride);
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]);
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]);
-
- vpm_offset += align(elem_size, 4);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset);
+ cl_u8(&shader_rec, elem_size - 1);
+ cl_u8(&shader_rec, vb->stride);
+ cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
+ cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);
if (vb->stride > 0) {
max_index = MIN2(max_index,
@@ -219,25 +193,89 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
if (vtx->num_elements == 0) {
assert(num_elements_emit == 1);
struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
- cl_reloc(vc4, &vc4->shader_rec, bo, 0);
- cl_u8(&vc4->shader_rec, 16 - 1); /* element size */
- cl_u8(&vc4->shader_rec, 0); /* stride */
- cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */
- cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0);
+ cl_u8(&shader_rec, 16 - 1); /* element size */
+ cl_u8(&shader_rec, 0); /* stride */
+ cl_u8(&shader_rec, 0); /* VS VPM offset */
+ cl_u8(&shader_rec, 0); /* CS VPM offset */
vc4_bo_unreference(&bo);
}
+ cl_end(&vc4->shader_rec, shader_rec);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
/* the actual draw call. */
- cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
+ cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
assert(vtx->num_elements <= 8);
/* Note that number of attributes == 0 in the packet means 8
* attributes. This field also contains the offset into shader_rec.
*/
- cl_u32(&vc4->bcl, num_elements_emit & 0x7);
+ cl_u32(&bcl, num_elements_emit & 0x7);
+ cl_end(&vc4->bcl, bcl);
+
+ vc4_write_uniforms(vc4, vc4->prog.fs,
+ &vc4->constbuf[PIPE_SHADER_FRAGMENT],
+ &vc4->fragtex);
+ vc4_write_uniforms(vc4, vc4->prog.vs,
+ &vc4->constbuf[PIPE_SHADER_VERTEX],
+ &vc4->verttex);
+ vc4_write_uniforms(vc4, vc4->prog.cs,
+ &vc4->constbuf[PIPE_SHADER_VERTEX],
+ &vc4->verttex);
+
+ vc4->last_index_bias = info->index_bias;
+ vc4->max_index = max_index;
+}
+
+static void
+vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+ struct vc4_context *vc4 = vc4_context(pctx);
+
+ if (info->mode >= PIPE_PRIM_QUADS) {
+ util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
+ util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
+ util_primconvert_draw_vbo(vc4->primconvert, info);
+ perf_debug("Fallback conversion for %d %s vertices\n",
+ info->count, u_prim_name(info->mode));
+ return;
+ }
+
+ /* Before setting up the draw, do any fixup blits necessary. */
+ vc4_update_shadow_textures(pctx, &vc4->verttex);
+ vc4_update_shadow_textures(pctx, &vc4->fragtex);
+
+ vc4_get_draw_cl_space(vc4);
+
+ if (vc4->prim_mode != info->mode) {
+ vc4->prim_mode = info->mode;
+ vc4->dirty |= VC4_DIRTY_PRIM_MODE;
+ }
+
+ vc4_start_draw(vc4);
+ vc4_update_compiled_shaders(vc4, info->mode);
+
+ vc4_emit_state(pctx);
+
+ if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
+ VC4_DIRTY_VTXSTATE |
+ VC4_DIRTY_PRIM_MODE |
+ VC4_DIRTY_RASTERIZER |
+ VC4_DIRTY_COMPILED_CS |
+ VC4_DIRTY_COMPILED_VS |
+ VC4_DIRTY_COMPILED_FS |
+ vc4->prog.cs->uniform_dirty_bits |
+ vc4->prog.vs->uniform_dirty_bits |
+ vc4->prog.fs->uniform_dirty_bits)) ||
+ vc4->last_index_bias != info->index_bias) {
+ vc4_emit_gl_shader_state(vc4, info);
+ }
+
+ vc4->dirty = 0;
/* Note that the primitive type fields match with OpenGL/gallium
* definitions, up to but not including QUADS.
*/
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
if (info->indexed) {
uint32_t offset = vc4->indexbuf.offset;
uint32_t index_size = vc4->indexbuf.index_size;
@@ -251,25 +289,26 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
struct vc4_resource *rsc = vc4_resource(prsc);
- cl_start_reloc(&vc4->bcl, 1);
- cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
- cl_u8(&vc4->bcl,
+ cl_start_reloc(&vc4->bcl, &bcl, 1);
+ cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
+ cl_u8(&bcl,
info->mode |
(index_size == 2 ?
VC4_INDEX_BUFFER_U16:
VC4_INDEX_BUFFER_U8));
- cl_u32(&vc4->bcl, info->count);
- cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
- cl_u32(&vc4->bcl, max_index);
+ cl_u32(&bcl, info->count);
+ cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
+ cl_u32(&bcl, vc4->max_index);
if (vc4->indexbuf.index_size == 4)
pipe_resource_reference(&prsc, NULL);
} else {
- cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
- cl_u8(&vc4->bcl, info->mode);
- cl_u32(&vc4->bcl, info->count);
- cl_u32(&vc4->bcl, info->start);
+ cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
+ cl_u8(&bcl, info->mode);
+ cl_u32(&bcl, info->count);
+ cl_u32(&bcl, info->start);
}
+ cl_end(&vc4->bcl, bcl);
if (vc4->zsa && vc4->zsa->base.depth.enabled) {
vc4->resolve |= PIPE_CLEAR_DEPTH;
diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h
index 5f1ee4fa125..863ef8da8fb 100644
--- a/src/gallium/drivers/vc4/vc4_drm.h
+++ b/src/gallium/drivers/vc4/vc4_drm.h
@@ -31,12 +31,14 @@
#define DRM_VC4_WAIT_BO 0x02
#define DRM_VC4_CREATE_BO 0x03
#define DRM_VC4_MMAP_BO 0x04
+#define DRM_VC4_CREATE_SHADER_BO 0x05
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
struct drm_vc4_submit_rcl_surface {
uint32_t hindex; /* Handle index, or ~0 if not present. */
@@ -183,6 +185,29 @@ struct drm_vc4_create_bo {
};
/**
+ * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
+ * shader BOs.
+ *
+ * Since allowing a shader to be overwritten while it's also being
+ * executed from would allow privlege escalation, shaders must be
+ * created using this ioctl, and they can't be mmapped later.
+ */
+struct drm_vc4_create_shader_bo {
+ /* Size of the data argument. */
+ uint32_t size;
+ /* Flags, currently must be 0. */
+ uint32_t flags;
+
+ /* Pointer to the data. */
+ uint64_t data;
+
+ /** Returned GEM handle for the BO. */
+ uint32_t handle;
+ /* Pad, must be 0. */
+ uint32_t pad;
+};
+
+/**
* struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
*
* This doesn't actually perform an mmap. Instead, it returns the
diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c
index d2b54fccf91..ba064ff889b 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -28,23 +28,24 @@ vc4_emit_state(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) {
float *vpscale = vc4->viewport.scale;
float *vptranslate = vc4->viewport.translate;
- float vp_minx = -fabs(vpscale[0]) + vptranslate[0];
- float vp_maxx = fabs(vpscale[0]) + vptranslate[0];
- float vp_miny = -fabs(vpscale[1]) + vptranslate[1];
- float vp_maxy = fabs(vpscale[1]) + vptranslate[1];
+ float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
+ float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
+ float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
+ float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
uint32_t minx = MAX2(vc4->scissor.minx, vp_minx);
uint32_t miny = MAX2(vc4->scissor.miny, vp_miny);
uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx);
uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy);
- cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW);
- cl_u16(&vc4->bcl, minx);
- cl_u16(&vc4->bcl, miny);
- cl_u16(&vc4->bcl, maxx - minx);
- cl_u16(&vc4->bcl, maxy - miny);
+ cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
+ cl_u16(&bcl, minx);
+ cl_u16(&bcl, miny);
+ cl_u16(&bcl, maxx - minx);
+ cl_u16(&bcl, maxy - miny);
vc4->draw_min_x = MIN2(vc4->draw_min_x, minx);
vc4->draw_min_y = MIN2(vc4->draw_min_y, miny);
@@ -53,47 +54,49 @@ vc4_emit_state(struct pipe_context *pctx)
}
if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
- cl_u8(&vc4->bcl, VC4_PACKET_CONFIGURATION_BITS);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[0] |
vc4->zsa->config_bits[0]);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[1] |
vc4->zsa->config_bits[1]);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[2] |
vc4->zsa->config_bits[2]);
}
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
- cl_u8(&vc4->bcl, VC4_PACKET_DEPTH_OFFSET);
- cl_u16(&vc4->bcl, vc4->rasterizer->offset_factor);
- cl_u16(&vc4->bcl, vc4->rasterizer->offset_units);
+ cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
+ cl_u16(&bcl, vc4->rasterizer->offset_factor);
+ cl_u16(&bcl, vc4->rasterizer->offset_units);
- cl_u8(&vc4->bcl, VC4_PACKET_POINT_SIZE);
- cl_f(&vc4->bcl, vc4->rasterizer->point_size);
+ cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
+ cl_f(&bcl, vc4->rasterizer->point_size);
- cl_u8(&vc4->bcl, VC4_PACKET_LINE_WIDTH);
- cl_f(&vc4->bcl, vc4->rasterizer->base.line_width);
+ cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
+ cl_f(&bcl, vc4->rasterizer->base.line_width);
}
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
- cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_XY_SCALING);
- cl_f(&vc4->bcl, vc4->viewport.scale[0] * 16.0f);
- cl_f(&vc4->bcl, vc4->viewport.scale[1] * 16.0f);
+ cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
+ cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
+ cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
- cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_Z_SCALING);
- cl_f(&vc4->bcl, vc4->viewport.translate[2]);
- cl_f(&vc4->bcl, vc4->viewport.scale[2]);
+ cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
+ cl_f(&bcl, vc4->viewport.translate[2]);
+ cl_f(&bcl, vc4->viewport.scale[2]);
- cl_u8(&vc4->bcl, VC4_PACKET_VIEWPORT_OFFSET);
- cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[0]);
- cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[1]);
+ cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
+ cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
+ cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
}
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
- cl_u8(&vc4->bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
- cl_u32(&vc4->bcl, vc4->rasterizer->base.flatshade ?
+ cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
+ cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
vc4->prog.fs->color_inputs : 0);
}
+
+ cl_end(&vc4->bcl, bcl);
}
diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c
index f2ee91de61a..b6fb2a8a460 100644
--- a/src/gallium/drivers/vc4/vc4_fence.c
+++ b/src/gallium/drivers/vc4/vc4_fence.c
@@ -60,16 +60,6 @@ vc4_fence_reference(struct pipe_screen *pscreen,
}
static boolean
-vc4_fence_signalled(struct pipe_screen *pscreen,
- struct pipe_fence_handle *pf)
-{
- struct vc4_screen *screen = vc4_screen(pscreen);
- struct vc4_fence *f = (struct vc4_fence *)pf;
-
- return vc4_wait_seqno(screen, f->seqno, 0);
-}
-
-static boolean
vc4_fence_finish(struct pipe_screen *pscreen,
struct pipe_fence_handle *pf,
uint64_t timeout_ns)
@@ -77,7 +67,7 @@ vc4_fence_finish(struct pipe_screen *pscreen,
struct vc4_screen *screen = vc4_screen(pscreen);
struct vc4_fence *f = (struct vc4_fence *)pf;
- return vc4_wait_seqno(screen, f->seqno, timeout_ns);
+ return vc4_wait_seqno(screen, f->seqno, timeout_ns, "fence wait");
}
struct vc4_fence *
@@ -98,6 +88,5 @@ void
vc4_fence_init(struct vc4_screen *screen)
{
screen->base.fence_reference = vc4_fence_reference;
- screen->base.fence_signalled = vc4_fence_signalled;
screen->base.fence_finish = vc4_fence_finish;
}
diff --git a/src/gallium/drivers/vc4/vc4_formats.c b/src/gallium/drivers/vc4/vc4_formats.c
index 004bac70c67..ffce61237de 100644
--- a/src/gallium/drivers/vc4/vc4_formats.c
+++ b/src/gallium/drivers/vc4/vc4_formats.c
@@ -108,7 +108,7 @@ static const struct vc4_format vc4_format_table[] = {
static const struct vc4_format *
get_format(enum pipe_format f)
{
- if (f > ARRAY_SIZE(vc4_format_table) ||
+ if (f >= ARRAY_SIZE(vc4_format_table) ||
!vc4_format_table[f].present)
return NULL;
else
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index dcade15443a..7ebd9f160eb 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -44,8 +44,7 @@ void
vc4_job_reset(struct vc4_context *vc4)
{
struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
- for (int i = 0; i < (vc4->bo_handles.next -
- vc4->bo_handles.base) / 4; i++) {
+ for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
vc4_bo_unreference(&referenced_bos[i]);
}
vc4_reset_cl(&vc4->bcl);
@@ -145,7 +144,7 @@ vc4_job_submit(struct vc4_context *vc4)
{
if (vc4_debug & VC4_DEBUG_CL) {
fprintf(stderr, "BCL:\n");
- vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false);
+ vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false);
}
struct drm_vc4_submit_cl submit;
@@ -164,15 +163,14 @@ vc4_job_submit(struct vc4_context *vc4)
vc4->zs_write, true, true);
submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
- submit.bo_handle_count = (vc4->bo_handles.next -
- vc4->bo_handles.base) / 4;
+ submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4;
submit.bin_cl = (uintptr_t)vc4->bcl.base;
- submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
+ submit.bin_cl_size = cl_offset(&vc4->bcl);
submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
- submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
+ submit.shader_rec_size = cl_offset(&vc4->shader_rec);
submit.shader_rec_count = vc4->shader_rec_count;
submit.uniforms = (uintptr_t)vc4->uniforms.base;
- submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
+ submit.uniforms_size = cl_offset(&vc4->uniforms);
assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
submit.min_x_tile = vc4->draw_min_x / 64;
@@ -207,7 +205,7 @@ vc4_job_submit(struct vc4_context *vc4)
if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
- PIPE_TIMEOUT_INFINITE)) {
+ PIPE_TIMEOUT_INFINITE, "sync")) {
fprintf(stderr, "Wait failed.\n");
abort();
}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
new file mode 100644
index 00000000000..a372a6c0cdc
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Implements most of the fixed function fragment pipeline in shader code.
+ *
+ * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
+ * or color mask. Instead, you read the current contents of the destination
+ * from the tile buffer after having waited for the scoreboard (which is
+ * handled by vc4_qpu_emit.c), then do math using your output color and that
+ * destination value, and update the output color appropriately.
+ */
+
+/**
+ * Lowers fixed-function blending to a load of the destination color and a
+ * series of ALU operations before the store of the output.
+ */
+#include "util/u_format.h"
+#include "vc4_qir.h"
+#include "glsl/nir/nir_builder.h"
+#include "vc4_context.h"
+
+/** Emits a load of the previous fragment color from the tile buffer. */
+static nir_ssa_def *
+vc4_nir_get_dst_color(nir_builder *b)
+{
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_load_input);
+ load->num_components = 1;
+ load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
+ nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+ return &load->dest.ssa;
+}
+
+static nir_ssa_def *
+vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
+{
+ nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
+ nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
+ nir_ssa_def *high = nir_fpow(b,
+ nir_fmul(b,
+ nir_fadd(b, srgb,
+ nir_imm_float(b, 0.055)),
+ nir_imm_float(b, 1.0 / 1.055)),
+ nir_imm_float(b, 2.4));
+
+ return nir_bcsel(b, is_low, low, high);
+}
+
+static nir_ssa_def *
+vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
+{
+ nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
+ nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
+ nir_ssa_def *high = nir_fsub(b,
+ nir_fmul(b,
+ nir_imm_float(b, 1.055),
+ nir_fpow(b,
+ linear,
+ nir_imm_float(b, 0.41666))),
+ nir_imm_float(b, 0.055));
+
+ return nir_bcsel(b, is_low, low, high);
+}
+
+static nir_ssa_def *
+vc4_blend_channel(nir_builder *b,
+ nir_ssa_def **src,
+ nir_ssa_def **dst,
+ unsigned factor,
+ int channel)
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return nir_imm_float(b, 1.0);
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return src[channel];
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return src[3];
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return dst[3];
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return dst[channel];
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if (channel != 3) {
+ return nir_fmin(b,
+ src[3],
+ nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ dst[3]));
+ } else {
+ return nir_imm_float(b, 1.0);
+ }
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel);
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W);
+ case PIPE_BLENDFACTOR_ZERO:
+ return nir_imm_float(b, 0.0);
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return nir_fsub(b, nir_imm_float(b, 1.0),
+ vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel));
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return nir_fsub(b, nir_imm_float(b, 1.0),
+ vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W));
+
+ default:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend factor %d\n", factor);
+ return nir_imm_float(b, 1.0);
+ }
+}
+
+static nir_ssa_def *
+vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+ unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return nir_fadd(b, src, dst);
+ case PIPE_BLEND_SUBTRACT:
+ return nir_fsub(b, src, dst);
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return nir_fsub(b, dst, src);
+ case PIPE_BLEND_MIN:
+ return nir_fmin(b, src, dst);
+ case PIPE_BLEND_MAX:
+ return nir_fmax(b, src, dst);
+
+ default:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend func %d\n", func);
+ return src;
+
+ }
+}
+
+static void
+vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
+ nir_ssa_def **src_color, nir_ssa_def **dst_color)
+{
+ struct pipe_rt_blend_state *blend = &c->fs_key->blend;
+
+ if (!blend->blend_enable) {
+ for (int i = 0; i < 4; i++)
+ result[i] = src_color[i];
+ return;
+ }
+
+ /* Clamp the src color to [0, 1]. Dest is already clamped. */
+ for (int i = 0; i < 4; i++)
+ src_color[i] = nir_fsat(b, src_color[i]);
+
+ nir_ssa_def *src_blend[4], *dst_blend[4];
+ for (int i = 0; i < 4; i++) {
+ int src_factor = ((i != 3) ? blend->rgb_src_factor :
+ blend->alpha_src_factor);
+ int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
+ blend->alpha_dst_factor);
+ src_blend[i] = nir_fmul(b, src_color[i],
+ vc4_blend_channel(b,
+ src_color, dst_color,
+ src_factor, i));
+ dst_blend[i] = nir_fmul(b, dst_color[i],
+ vc4_blend_channel(b,
+ src_color, dst_color,
+ dst_factor, i));
+ }
+
+ for (int i = 0; i < 4; i++) {
+ result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
+ ((i != 3) ? blend->rgb_func :
+ blend->alpha_func));
+ }
+}
+
+static nir_ssa_def *
+vc4_logicop(nir_builder *b, int logicop_func,
+ nir_ssa_def *src, nir_ssa_def *dst)
+{
+ switch (logicop_func) {
+ case PIPE_LOGICOP_CLEAR:
+ return nir_imm_int(b, 0);
+ case PIPE_LOGICOP_NOR:
+ return nir_inot(b, nir_ior(b, src, dst));
+ case PIPE_LOGICOP_AND_INVERTED:
+ return nir_iand(b, nir_inot(b, src), dst);
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return nir_inot(b, src);
+ case PIPE_LOGICOP_AND_REVERSE:
+ return nir_iand(b, src, nir_inot(b, dst));
+ case PIPE_LOGICOP_INVERT:
+ return nir_inot(b, dst);
+ case PIPE_LOGICOP_XOR:
+ return nir_ixor(b, src, dst);
+ case PIPE_LOGICOP_NAND:
+ return nir_inot(b, nir_iand(b, src, dst));
+ case PIPE_LOGICOP_AND:
+ return nir_iand(b, src, dst);
+ case PIPE_LOGICOP_EQUIV:
+ return nir_inot(b, nir_ixor(b, src, dst));
+ case PIPE_LOGICOP_NOOP:
+ return dst;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return nir_ior(b, nir_inot(b, src), dst);
+ case PIPE_LOGICOP_OR_REVERSE:
+ return nir_ior(b, src, nir_inot(b, dst));
+ case PIPE_LOGICOP_OR:
+ return nir_ior(b, src, dst);
+ case PIPE_LOGICOP_SET:
+ return nir_imm_int(b, ~0);
+ default:
+ fprintf(stderr, "Unknown logic op %d\n", logicop_func);
+ /* FALLTHROUGH */
+ case PIPE_LOGICOP_COPY:
+ return src;
+ }
+}
+
+static nir_ssa_def *
+vc4_nir_pipe_compare_func(nir_builder *b, int func,
+ nir_ssa_def *src0, nir_ssa_def *src1)
+{
+ switch (func) {
+ default:
+ fprintf(stderr, "Unknown compare func %d\n", func);
+ /* FALLTHROUGH */
+ case PIPE_FUNC_NEVER:
+ return nir_imm_int(b, 0);
+ case PIPE_FUNC_ALWAYS:
+ return nir_imm_int(b, ~0);
+ case PIPE_FUNC_EQUAL:
+ return nir_feq(b, src0, src1);
+ case PIPE_FUNC_NOTEQUAL:
+ return nir_fne(b, src0, src1);
+ case PIPE_FUNC_GREATER:
+ return nir_flt(b, src1, src0);
+ case PIPE_FUNC_GEQUAL:
+ return nir_fge(b, src0, src1);
+ case PIPE_FUNC_LESS:
+ return nir_flt(b, src0, src1);
+ case PIPE_FUNC_LEQUAL:
+ return nir_fge(b, src1, src0);
+ }
+}
+
+static void
+vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
+ nir_ssa_def *alpha)
+{
+ if (!c->fs_key->alpha_test)
+ return;
+
+ nir_ssa_def *alpha_ref =
+ vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF);
+ nir_ssa_def *condition =
+ vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
+ alpha, alpha_ref);
+
+ nir_intrinsic_instr *discard =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_discard_if);
+ discard->num_components = 1;
+ discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
+ nir_builder_instr_insert(b, &discard->instr);
+}
+
+static void
+vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ enum pipe_format color_format = c->fs_key->color_format;
+ const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+
+ /* Pull out the float src/dst color components. */
+ nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
+ nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
+ nir_ssa_def *src_color[4], *unpacked_dst_color[4];
+ for (unsigned i = 0; i < 4; i++) {
+ src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
+ unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
+ }
+
+ /* Unswizzle the destination color. */
+ nir_ssa_def *dst_color[4];
+ for (unsigned i = 0; i < 4; i++) {
+ dst_color[i] = vc4_nir_get_swizzled_channel(b,
+ unpacked_dst_color,
+ format_swiz[i]);
+ }
+
+ vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
+
+ /* Turn dst color to linear. */
+ if (util_format_is_srgb(color_format)) {
+ for (int i = 0; i < 3; i++)
+ dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
+ }
+
+ nir_ssa_def *blend_color[4];
+ vc4_do_blending(c, b, blend_color, src_color, dst_color);
+
+ /* sRGB encode the output color */
+ if (util_format_is_srgb(color_format)) {
+ for (int i = 0; i < 3; i++)
+ blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
+ }
+
+ nir_ssa_def *swizzled_outputs[4];
+ for (int i = 0; i < 4; i++) {
+ swizzled_outputs[i] =
+ vc4_nir_get_swizzled_channel(b, blend_color,
+ format_swiz[i]);
+ }
+
+ nir_ssa_def *packed_color =
+ nir_pack_unorm_4x8(b,
+ nir_vec4(b,
+ swizzled_outputs[0],
+ swizzled_outputs[1],
+ swizzled_outputs[2],
+ swizzled_outputs[3]));
+
+ packed_color = vc4_logicop(b, c->fs_key->logicop_func,
+ packed_color, packed_dst_color);
+
+ /* If the bit isn't set in the color mask, then just return the
+ * original dst color, instead.
+ */
+ uint32_t colormask = 0xffffffff;
+ for (int i = 0; i < 4; i++) {
+ if (format_swiz[i] < 4 &&
+ !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
+ colormask &= ~(0xff << (i * 8));
+ }
+ }
+ packed_color = nir_ior(b,
+ nir_iand(b, packed_color,
+ nir_imm_int(b, colormask)),
+ nir_iand(b, packed_dst_color,
+ nir_imm_int(b, ~colormask)));
+
+ /* Turn the old vec4 output into a store of the packed color. */
+ nir_instr_rewrite_src(&intr->instr, &intr->src[0],
+ nir_src_for_ssa(packed_color));
+ intr->num_components = 1;
+}
+
+static bool
+vc4_nir_lower_blend_block(nir_block *block, void *state)
+{
+ struct vc4_compile *c = state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
+
+ nir_variable *output_var = NULL;
+ foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+ if (var->data.driver_location == intr->const_index[0]) {
+ output_var = var;
+ break;
+ }
+ }
+ assert(output_var);
+ unsigned semantic_name = output_var->data.location;
+
+ if (semantic_name != TGSI_SEMANTIC_COLOR)
+ continue;
+
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&block->cf_node);
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ nir_builder_insert_before_instr(&b, &intr->instr);
+ vc4_nir_lower_blend_instr(c, &b, intr);
+ }
+ return true;
+}
+
+void
+vc4_nir_lower_blend(struct vc4_compile *c)
+{
+ nir_foreach_overload(c->s, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl,
+ vc4_nir_lower_blend_block, c);
+
+ nir_metadata_preserve(overload->impl,
+ nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
new file mode 100644
index 00000000000..229d41147d8
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "vc4_qir.h"
+#include "tgsi/tgsi_info.h"
+#include "glsl/nir/nir_builder.h"
+
+/**
+ * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
+ * something amenable to the VC4 architecture.
+ *
+ * Currently, it split inputs, outputs, and uniforms into scalars, drops any
+ * non-position outputs in coordinate shaders, and fixes up the addressing on
+ * indirect uniform loads.
+ */
+
+static void
+replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
+ nir_ssa_def **comps)
+{
+
+ /* Batch things back together into a vec4. This will get split by the
+ * later ALU scalarization pass.
+ */
+ nir_ssa_def *vec = nir_vec4(b, comps[0], comps[1], comps[2], comps[3]);
+
+ /* Replace the old intrinsic with a reference to our reconstructed
+ * vec4.
+ */
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec),
+ ralloc_parent(b->impl));
+ nir_instr_remove(&intr->instr);
+}
+
+static void
+vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
+ VC4_NIR_TLB_COLOR_READ_INPUT) {
+ /* This doesn't need any lowering. */
+ return;
+ }
+
+ nir_variable *input_var = NULL;
+ foreach_list_typed(nir_variable, var, node, &c->s->inputs) {
+ if (var->data.driver_location == intr->const_index[0]) {
+ input_var = var;
+ break;
+ }
+ }
+ assert(input_var);
+ int semantic_name = input_var->data.location;
+ int semantic_index = input_var->data.index;
+
+ /* All TGSI-to-NIR inputs are vec4. */
+ assert(intr->num_components == 4);
+
+ /* Generate scalar loads equivalent to the original VEC4. */
+ nir_ssa_def *dests[4];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input);
+ intr_comp->num_components = 1;
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_builder_instr_insert(b, &intr_comp->instr);
+
+ dests[i] = &intr_comp->dest.ssa;
+ }
+
+ switch (c->stage) {
+ case QSTAGE_FRAG:
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_FACE:
+ dests[0] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ nir_fmul(b,
+ nir_i2f(b, dests[0]),
+ nir_imm_float(b, 2.0)));
+ dests[1] = nir_imm_float(b, 0.0);
+ dests[2] = nir_imm_float(b, 0.0);
+ dests[3] = nir_imm_float(b, 1.0);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (c->fs_key->point_sprite_mask &
+ (1 << semantic_index)) {
+ if (!c->fs_key->is_points) {
+ dests[0] = nir_imm_float(b, 0.0);
+ dests[1] = nir_imm_float(b, 0.0);
+ }
+ if (c->fs_key->point_coord_upper_left) {
+ dests[1] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ dests[1]);
+ }
+ dests[2] = nir_imm_float(b, 0.0);
+ dests[3] = nir_imm_float(b, 1.0);
+ }
+ break;
+ }
+ break;
+ case QSTAGE_COORD:
+ case QSTAGE_VERT:
+ break;
+ }
+
+ replace_intrinsic_with_vec4(b, intr, dests);
+}
+
+static void
+vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ nir_variable *output_var = NULL;
+ foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+ if (var->data.driver_location == intr->const_index[0]) {
+ output_var = var;
+ break;
+ }
+ }
+ assert(output_var);
+ unsigned semantic_name = output_var->data.location;
+
+ if (c->stage == QSTAGE_COORD &&
+ (semantic_name != TGSI_SEMANTIC_POSITION &&
+ semantic_name != TGSI_SEMANTIC_PSIZE)) {
+ nir_instr_remove(&intr->instr);
+ return;
+ }
+
+ /* Color output is lowered by vc4_nir_lower_blend(). */
+ if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) {
+ intr->const_index[0] *= 4;
+ return;
+ }
+
+ /* All TGSI-to-NIR outputs are VEC4. */
+ assert(intr->num_components == 4);
+
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
+ intr_comp->num_components = 1;
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+
+ assert(intr->src[0].is_ssa);
+ intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b,
+ intr->src[0].ssa,
+ &i, 1, false));
+ nir_builder_instr_insert(b, &intr_comp->instr);
+ }
+
+ nir_instr_remove(&intr->instr);
+}
+
+static void
+vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ /* All TGSI-to-NIR uniform loads are vec4, but we may create dword
+ * loads in our lowering passes.
+ */
+ if (intr->num_components == 1)
+ return;
+ assert(intr->num_components == 4);
+
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ /* Generate scalar loads equivalent to the original VEC4. */
+ nir_ssa_def *dests[4];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, intr->intrinsic);
+ intr_comp->num_components = 1;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+
+ if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) {
+ /* Convert the variable TGSI register index to a byte
+ * offset.
+ */
+ intr_comp->src[0] =
+ nir_src_for_ssa(nir_ishl(b,
+ intr->src[0].ssa,
+ nir_imm_int(b, 4)));
+
+ /* Convert the offset to be a byte index, too. */
+ intr_comp->const_index[0] = (intr->const_index[0] * 16 +
+ i * 4);
+ } else {
+ /* We want a dword index for non-indirect uniform
+ * loads.
+ */
+ intr_comp->const_index[0] = (intr->const_index[0] * 4 +
+ i);
+ }
+
+ dests[i] = &intr_comp->dest.ssa;
+
+ nir_builder_instr_insert(b, &intr_comp->instr);
+ }
+
+ replace_intrinsic_with_vec4(b, intr, dests);
+}
+
+static void
+vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
+ struct nir_instr *instr)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_input:
+ vc4_nir_lower_input(c, b, intr);
+ break;
+
+ case nir_intrinsic_store_output:
+ vc4_nir_lower_output(c, b, intr);
+ break;
+
+ case nir_intrinsic_load_uniform:
+ case nir_intrinsic_load_uniform_indirect:
+ vc4_nir_lower_uniform(c, b, intr);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static bool
+vc4_nir_lower_io_block(nir_block *block, void *arg)
+{
+ struct vc4_compile *c = arg;
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&block->cf_node);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_instr_safe(block, instr)
+ vc4_nir_lower_io_instr(c, &b, instr);
+
+ return true;
+}
+
+static bool
+vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl)
+{
+ nir_foreach_block(impl, vc4_nir_lower_io_block, c);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return true;
+}
+
+void
+vc4_nir_lower_io(struct vc4_compile *c)
+{
+ nir_foreach_overload(c->s, overload) {
+ if (overload->impl)
+ vc4_nir_lower_io_impl(c, overload->impl);
+ }
+}
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index d6d2fbf257f..a755de9aa41 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -67,10 +67,7 @@ qir_opt_copy_propagation(struct vc4_compile *c)
if (inst->op == QOP_MOV &&
inst->dst.file == QFILE_TEMP &&
- inst->src[0].file != QFILE_VPM &&
- !(inst->src[0].file == QFILE_TEMP &&
- (c->defs[inst->src[0].index]->op == QOP_TEX_RESULT ||
- c->defs[inst->src[0].index]->op == QOP_TLB_COLOR_READ))) {
+ inst->src[0].file != QFILE_VPM) {
movs[inst->dst.index] = inst->src[0];
}
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
index 92c8260eb59..0e5480ea781 100644
--- a/src/gallium/drivers/vc4/vc4_opt_cse.c
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -46,8 +46,7 @@ struct inst_key {
struct qreg src[4];
/**
* If the instruction depends on the flags, how many SFs have been
- * seen before this instruction, or if it depends on r4, how many r4
- * writes have been seen.
+ * seen before this instruction.
*/
uint32_t implicit_arg_update_count;
};
@@ -63,8 +62,7 @@ inst_key_equals(const void *a, const void *b)
static struct qinst *
vc4_find_cse(struct vc4_compile *c, struct hash_table *ht,
- struct qinst *inst, uint32_t sf_count,
- uint32_t r4_count)
+ struct qinst *inst, uint32_t sf_count)
{
if (inst->dst.file != QFILE_TEMP ||
inst->op == QOP_MOV ||
@@ -79,8 +77,6 @@ vc4_find_cse(struct vc4_compile *c, struct hash_table *ht,
qir_get_op_nsrc(inst->op) * sizeof(key.src[0]));
if (qir_depends_on_flags(inst))
key.implicit_arg_update_count = sf_count;
- if (qir_reads_r4(inst))
- key.implicit_arg_update_count = r4_count;
uint32_t hash = _mesa_hash_data(&key, sizeof(key));
struct hash_entry *entry =
@@ -121,7 +117,7 @@ bool
qir_opt_cse(struct vc4_compile *c)
{
bool progress = false;
- uint32_t sf_count = 0, r4_count = 0;
+ uint32_t sf_count = 0;
struct hash_table *ht = _mesa_hash_table_create(NULL, NULL,
inst_key_equals);
@@ -130,15 +126,15 @@ qir_opt_cse(struct vc4_compile *c)
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
if (qir_has_side_effects(c, inst) ||
- qir_has_side_effect_reads(c, inst)) {
+ qir_has_side_effect_reads(c, inst) ||
+ inst->op == QOP_TLB_COLOR_READ) {
continue;
}
if (inst->sf) {
sf_count++;
} else {
- struct qinst *cse = vc4_find_cse(c, ht, inst,
- sf_count, r4_count);
+ struct qinst *cse = vc4_find_cse(c, ht, inst, sf_count);
if (cse) {
inst->src[0] = cse->dst;
for (int i = 1; i < qir_get_op_nsrc(inst->op);
@@ -154,9 +150,6 @@ qir_opt_cse(struct vc4_compile *c)
}
}
}
-
- if (qir_writes_r4(inst))
- r4_count++;
}
ralloc_free(ht);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index ba47c51d9bd..13c472152d8 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -23,21 +23,19 @@
*/
#include <inttypes.h>
-#include "pipe/p_state.h"
#include "util/u_format.h"
#include "util/u_hash.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "util/u_pack_color.h"
-#include "util/format_srgb.h"
#include "util/ralloc.h"
#include "util/hash_table.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_lowering.h"
#include "tgsi/tgsi_parse.h"
+#include "glsl/nir/nir.h"
+#include "glsl/nir/nir_builder.h"
#include "nir/tgsi_to_nir.h"
-
#include "vc4_context.h"
#include "vc4_qpu.h"
#include "vc4_qir.h"
@@ -45,51 +43,8 @@
#include "simpenrose/simpenrose.h"
#endif
-struct vc4_key {
- struct vc4_uncompiled_shader *shader_state;
- struct {
- enum pipe_format format;
- unsigned compare_mode:1;
- unsigned compare_func:3;
- unsigned wrap_s:3;
- unsigned wrap_t:3;
- uint8_t swizzle[4];
- } tex[VC4_MAX_TEXTURE_SAMPLERS];
- uint8_t ucp_enables;
-};
-
-struct vc4_fs_key {
- struct vc4_key base;
- enum pipe_format color_format;
- bool depth_enabled;
- bool stencil_enabled;
- bool stencil_twoside;
- bool stencil_full_writemasks;
- bool is_points;
- bool is_lines;
- bool alpha_test;
- bool point_coord_upper_left;
- bool light_twoside;
- uint8_t alpha_test_func;
- uint8_t logicop_func;
- uint32_t point_sprite_mask;
-
- struct pipe_rt_blend_state blend;
-};
-
-struct vc4_vs_key {
- struct vc4_key base;
-
- /**
- * This is a proxy for the array of FS input semantics, which is
- * larger than we would want to put in the key.
- */
- uint64_t compiled_fs_id;
-
- enum pipe_format attr_formats[8];
- bool is_coord;
- bool per_vertex_point_size;
-};
+static struct qreg
+ntq_get_src(struct vc4_compile *c, nir_src src, int i);
static void
resize_qreg_array(struct vc4_compile *c,
@@ -113,10 +68,10 @@ resize_qreg_array(struct vc4_compile *c,
}
static struct qreg
-indirect_uniform_load(struct vc4_compile *c,
- struct qreg indirect_offset,
- unsigned offset)
+indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
{
+ struct qreg indirect_offset = ntq_get_src(c, intr->src[0], 0);
+ uint32_t offset = intr->const_index[0];
struct vc4_compiler_ubo_range *range = NULL;
unsigned i;
for (i = 0; i < c->num_uniform_ranges; i++) {
@@ -138,10 +93,6 @@ indirect_uniform_load(struct vc4_compile *c,
};
offset -= range->src_offset;
- /* Translate the user's TGSI register index from the TGSI register
- * base to a byte offset.
- */
- indirect_offset = qir_SHL(c, indirect_offset, qir_uniform_ui(c, 4));
/* Adjust for where we stored the TGSI register base. */
indirect_offset = qir_ADD(c, indirect_offset,
@@ -155,24 +106,70 @@ indirect_uniform_load(struct vc4_compile *c,
range->size - 4)));
qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
- struct qreg r4 = qir_TEX_RESULT(c);
c->num_texture_samples++;
- return qir_MOV(c, r4);
+ return qir_TEX_RESULT(c);
}
-static struct qreg *
-ntq_get_dest(struct vc4_compile *c, nir_dest dest)
+nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
+ enum quniform_contents contents)
{
- assert(!dest.is_ssa);
- nir_register *reg = dest.reg.reg;
- struct hash_entry *entry = _mesa_hash_table_search(c->def_ht, reg);
- assert(reg->num_array_elems == 0);
- assert(dest.reg.base_offset == 0);
+ nir_intrinsic_instr *intr =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_load_uniform);
+ intr->const_index[0] = VC4_NIR_STATE_UNIFORM_OFFSET + contents;
+ intr->num_components = 1;
+ nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
+ nir_builder_instr_insert(b, &intr->instr);
+ return &intr->dest.ssa;
+}
- struct qreg *qregs = entry->data;
+nir_ssa_def *
+vc4_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
+{
+ switch (swiz) {
+ default:
+ case UTIL_FORMAT_SWIZZLE_NONE:
+ fprintf(stderr, "warning: unknown swizzle\n");
+ /* FALLTHROUGH */
+ case UTIL_FORMAT_SWIZZLE_0:
+ return nir_imm_float(b, 0.0);
+ case UTIL_FORMAT_SWIZZLE_1:
+ return nir_imm_float(b, 1.0);
+ case UTIL_FORMAT_SWIZZLE_X:
+ case UTIL_FORMAT_SWIZZLE_Y:
+ case UTIL_FORMAT_SWIZZLE_Z:
+ case UTIL_FORMAT_SWIZZLE_W:
+ return srcs[swiz];
+ }
+}
+
+static struct qreg *
+ntq_init_ssa_def(struct vc4_compile *c, nir_ssa_def *def)
+{
+ struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
+ def->num_components);
+ _mesa_hash_table_insert(c->def_ht, def, qregs);
return qregs;
}
+static struct qreg *
+ntq_get_dest(struct vc4_compile *c, nir_dest *dest)
+{
+ if (dest->is_ssa) {
+ struct qreg *qregs = ntq_init_ssa_def(c, &dest->ssa);
+ for (int i = 0; i < dest->ssa.num_components; i++)
+ qregs[i] = c->undef;
+ return qregs;
+ } else {
+ nir_register *reg = dest->reg.reg;
+ assert(dest->reg.base_offset == 0);
+ assert(reg->num_array_elems == 0);
+ struct hash_entry *entry =
+ _mesa_hash_table_search(c->def_ht, reg);
+ return entry->data;
+ }
+}
+
static struct qreg
ntq_get_src(struct vc4_compile *c, nir_src src, int i)
{
@@ -282,22 +279,6 @@ qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
}
static struct qreg
-qir_srgb_encode(struct vc4_compile *c, struct qreg linear)
-{
- struct qreg low = qir_FMUL(c, linear, qir_uniform_f(c, 12.92));
- struct qreg high = qir_FSUB(c,
- qir_FMUL(c,
- qir_uniform_f(c, 1.055),
- qir_POW(c,
- linear,
- qir_uniform_f(c, 0.41666))),
- qir_uniform_f(c, 0.055));
-
- qir_SF(c, qir_FSUB(c, linear, qir_uniform_f(c, 0.0031308)));
- return qir_SEL_X_Y_NS(c, low, high);
-}
-
-static struct qreg
ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1)
{
struct qreg src0_hi = qir_SHR(c, src0,
@@ -410,13 +391,13 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
qir_TEX_S(c, s, texture_u[next_texture_u++]);
c->num_texture_samples++;
- struct qreg r4 = qir_TEX_RESULT(c);
+ struct qreg tex = qir_TEX_RESULT(c);
enum pipe_format format = c->key->tex[unit].format;
struct qreg unpacked[4];
if (util_format_is_depth_or_stencil(format)) {
- struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
+ struct qreg depthf = qir_ITOF(c, qir_SHR(c, tex,
qir_uniform_ui(c, 8)));
struct qreg normalized = qir_FMUL(c, depthf,
qir_uniform_f(c, 1.0f/0xffffff));
@@ -468,7 +449,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
unpacked[i] = depth_output;
} else {
for (int i = 0; i < 4; i++)
- unpacked[i] = qir_R4_UNPACK(c, r4, i);
+ unpacked[i] = qir_UNPACK_8_F(c, tex, i);
}
const uint8_t *format_swiz = vc4_get_format_swizzle(format);
@@ -484,7 +465,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
texture_output[i]);
}
- struct qreg *dest = ntq_get_dest(c, instr->dest);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest);
for (int i = 0; i < 4; i++) {
dest[i] = get_swizzled_channel(c, texture_output,
c->key->tex[unit].swizzle[i]);
@@ -558,7 +539,7 @@ ntq_fsin(struct vc4_compile *c, struct qreg src)
struct qreg scaled_x =
qir_FMUL(c,
src,
- qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
+ qir_uniform_f(c, 1.0 / (M_PI * 2.0)));
struct qreg x = qir_FADD(c,
ntq_ffract(c, scaled_x),
@@ -756,26 +737,6 @@ emit_fragcoord_input(struct vc4_compile *c, int attr)
c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
}
-static void
-emit_point_coord_input(struct vc4_compile *c, int attr)
-{
- if (c->point_x.file == QFILE_NULL) {
- c->point_x = qir_uniform_f(c, 0.0);
- c->point_y = qir_uniform_f(c, 0.0);
- }
-
- c->inputs[attr * 4 + 0] = c->point_x;
- if (c->fs_key->point_coord_upper_left) {
- c->inputs[attr * 4 + 1] = qir_FSUB(c,
- qir_uniform_f(c, 1.0),
- c->point_y);
- } else {
- c->inputs[attr * 4 + 1] = c->point_y;
- }
- c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
- c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
-}
-
static struct qreg
emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
uint8_t index, uint8_t swizzle)
@@ -817,19 +778,6 @@ emit_fragment_input(struct vc4_compile *c, int attr,
}
static void
-emit_face_input(struct vc4_compile *c, int attr)
-{
- c->inputs[attr * 4 + 0] = qir_FSUB(c,
- qir_uniform_f(c, 1.0),
- qir_FMUL(c,
- qir_ITOF(c, qir_FRAG_REV_FLAG(c)),
- qir_uniform_f(c, 2.0)));
- c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0);
- c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
- c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
-}
-
-static void
add_output(struct vc4_compile *c,
uint32_t decl_offset,
uint8_t semantic_name,
@@ -884,12 +832,38 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
srcs[i] = ntq_get_src(c, instr->src[i].src,
instr->src[i].swizzle[0]);
- struct qreg *dest = ntq_get_dest(c, instr->dest.dest);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
dest[i] = srcs[i];
return;
}
+ if (instr->op == nir_op_pack_unorm_4x8) {
+ struct qreg result;
+ for (int i = 0; i < 4; i++) {
+ struct qreg src = ntq_get_src(c, instr->src[0].src,
+ instr->src[0].swizzle[i]);
+ if (i == 0)
+ result = qir_PACK_8888_F(c, src);
+ else
+ result = qir_PACK_8_F(c, result, src, i);
+ }
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ *dest = result;
+ return;
+ }
+
+ if (instr->op == nir_op_unpack_unorm_4x8) {
+ struct qreg src = ntq_get_src(c, instr->src[0].src,
+ instr->src[0].swizzle[0]);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ for (int i = 0; i < 4; i++) {
+ if (instr->dest.write_mask & (1 << i))
+ dest[i] = qir_UNPACK_8_F(c, src, i);
+ }
+ return;
+ }
+
/* General case: We can just grab the one used channel per src. */
struct qreg src[nir_op_infos[instr->op].num_inputs];
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
@@ -898,7 +872,7 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
/* Pick the channel to store the output in. */
assert(!instr->dest.saturate);
- struct qreg *dest = ntq_get_dest(c, instr->dest.dest);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
assert(util_is_power_of_two(instr->dest.write_mask));
dest += ffs(instr->dest.write_mask) - 1;
@@ -1092,167 +1066,6 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
}
}
-static struct qreg
-vc4_blend_channel(struct vc4_compile *c,
- struct qreg *dst,
- struct qreg *src,
- struct qreg val,
- unsigned factor,
- int channel)
-{
- switch(factor) {
- case PIPE_BLENDFACTOR_ONE:
- return val;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- return qir_FMUL(c, val, src[channel]);
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return qir_FMUL(c, val, src[3]);
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return qir_FMUL(c, val, dst[3]);
- case PIPE_BLENDFACTOR_DST_COLOR:
- return qir_FMUL(c, val, dst[channel]);
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- if (channel != 3) {
- return qir_FMUL(c,
- val,
- qir_FMIN(c,
- src[3],
- qir_FSUB(c,
- qir_uniform_f(c, 1.0),
- dst[3])));
- } else {
- return val;
- }
- case PIPE_BLENDFACTOR_CONST_COLOR:
- return qir_FMUL(c, val,
- qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR,
- channel));
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return qir_FMUL(c, val,
- qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, 3));
- case PIPE_BLENDFACTOR_ZERO:
- return qir_uniform_f(c, 0.0);
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- src[channel]));
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- src[3]));
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- dst[3]));
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- dst[channel]));
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- return qir_FMUL(c, val,
- qir_FSUB(c, qir_uniform_f(c, 1.0),
- qir_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- channel)));
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- return qir_FMUL(c, val,
- qir_FSUB(c, qir_uniform_f(c, 1.0),
- qir_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- 3)));
-
- default:
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* Unsupported. */
- fprintf(stderr, "Unknown blend factor %d\n", factor);
- return val;
- }
-}
-
-static struct qreg
-vc4_blend_func(struct vc4_compile *c,
- struct qreg src, struct qreg dst,
- unsigned func)
-{
- switch (func) {
- case PIPE_BLEND_ADD:
- return qir_FADD(c, src, dst);
- case PIPE_BLEND_SUBTRACT:
- return qir_FSUB(c, src, dst);
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return qir_FSUB(c, dst, src);
- case PIPE_BLEND_MIN:
- return qir_FMIN(c, src, dst);
- case PIPE_BLEND_MAX:
- return qir_FMAX(c, src, dst);
-
- default:
- /* Unsupported. */
- fprintf(stderr, "Unknown blend func %d\n", func);
- return src;
-
- }
-}
-
-/**
- * Implements fixed function blending in shader code.
- *
- * VC4 doesn't have any hardware support for blending. Instead, you read the
- * current contents of the destination from the tile buffer after having
- * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
- * math using your output color and that destination value, and update the
- * output color appropriately.
- */
-static void
-vc4_blend(struct vc4_compile *c, struct qreg *result,
- struct qreg *dst_color, struct qreg *src_color)
-{
- struct pipe_rt_blend_state *blend = &c->fs_key->blend;
-
- if (!blend->blend_enable) {
- for (int i = 0; i < 4; i++)
- result[i] = src_color[i];
- return;
- }
-
- struct qreg clamped_src[4];
- struct qreg clamped_dst[4];
- for (int i = 0; i < 4; i++) {
- clamped_src[i] = qir_SAT(c, src_color[i]);
- clamped_dst[i] = qir_SAT(c, dst_color[i]);
- }
- src_color = clamped_src;
- dst_color = clamped_dst;
-
- struct qreg src_blend[4], dst_blend[4];
- for (int i = 0; i < 3; i++) {
- src_blend[i] = vc4_blend_channel(c,
- dst_color, src_color,
- src_color[i],
- blend->rgb_src_factor, i);
- dst_blend[i] = vc4_blend_channel(c,
- dst_color, src_color,
- dst_color[i],
- blend->rgb_dst_factor, i);
- }
- src_blend[3] = vc4_blend_channel(c,
- dst_color, src_color,
- src_color[3],
- blend->alpha_src_factor, 3);
- dst_blend[3] = vc4_blend_channel(c,
- dst_color, src_color,
- dst_color[3],
- blend->alpha_dst_factor, 3);
-
- for (int i = 0; i < 3; i++) {
- result[i] = vc4_blend_func(c,
- src_blend[i], dst_blend[i],
- blend->rgb_func);
- }
- result[3] = vc4_blend_func(c,
- src_blend[3], dst_blend[3],
- blend->alpha_func);
-}
-
static void
clip_distance_discard(struct vc4_compile *c)
{
@@ -1276,167 +1089,15 @@ clip_distance_discard(struct vc4_compile *c)
}
static void
-alpha_test_discard(struct vc4_compile *c)
-{
- struct qreg src_alpha;
- struct qreg alpha_ref = qir_uniform(c, QUNIFORM_ALPHA_REF, 0);
-
- if (!c->fs_key->alpha_test)
- return;
-
- if (c->output_color_index != -1)
- src_alpha = c->outputs[c->output_color_index + 3];
- else
- src_alpha = qir_uniform_f(c, 1.0);
-
- if (c->discard.file == QFILE_NULL)
- c->discard = qir_uniform_ui(c, 0);
-
- switch (c->fs_key->alpha_test_func) {
- case PIPE_FUNC_NEVER:
- c->discard = qir_uniform_ui(c, ~0);
- break;
- case PIPE_FUNC_ALWAYS:
- break;
- case PIPE_FUNC_EQUAL:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_ZS(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_NOTEQUAL:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_ZC(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_GREATER:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_NC(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_GEQUAL:
- qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
- c->discard = qir_SEL_X_Y_NS(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_LESS:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_NS(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_LEQUAL:
- qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
- c->discard = qir_SEL_X_Y_NC(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- }
-}
-
-static struct qreg
-vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst)
-{
- switch (c->fs_key->logicop_func) {
- case PIPE_LOGICOP_CLEAR:
- return qir_uniform_f(c, 0.0);
- case PIPE_LOGICOP_NOR:
- return qir_NOT(c, qir_OR(c, src, dst));
- case PIPE_LOGICOP_AND_INVERTED:
- return qir_AND(c, qir_NOT(c, src), dst);
- case PIPE_LOGICOP_COPY_INVERTED:
- return qir_NOT(c, src);
- case PIPE_LOGICOP_AND_REVERSE:
- return qir_AND(c, src, qir_NOT(c, dst));
- case PIPE_LOGICOP_INVERT:
- return qir_NOT(c, dst);
- case PIPE_LOGICOP_XOR:
- return qir_XOR(c, src, dst);
- case PIPE_LOGICOP_NAND:
- return qir_NOT(c, qir_AND(c, src, dst));
- case PIPE_LOGICOP_AND:
- return qir_AND(c, src, dst);
- case PIPE_LOGICOP_EQUIV:
- return qir_NOT(c, qir_XOR(c, src, dst));
- case PIPE_LOGICOP_NOOP:
- return dst;
- case PIPE_LOGICOP_OR_INVERTED:
- return qir_OR(c, qir_NOT(c, src), dst);
- case PIPE_LOGICOP_OR_REVERSE:
- return qir_OR(c, src, qir_NOT(c, dst));
- case PIPE_LOGICOP_OR:
- return qir_OR(c, src, dst);
- case PIPE_LOGICOP_SET:
- return qir_uniform_ui(c, ~0);
- case PIPE_LOGICOP_COPY:
- default:
- return src;
- }
-}
-
-static void
emit_frag_end(struct vc4_compile *c)
{
clip_distance_discard(c);
- alpha_test_discard(c);
-
- enum pipe_format color_format = c->fs_key->color_format;
- const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
- struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
- struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
- struct qreg linear_dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
- struct qreg packed_dst_color = c->undef;
-
- if (c->fs_key->blend.blend_enable ||
- c->fs_key->blend.colormask != 0xf ||
- c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
- struct qreg r4 = qir_TLB_COLOR_READ(c);
- for (int i = 0; i < 4; i++)
- tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
- for (int i = 0; i < 4; i++) {
- dst_color[i] = get_swizzled_channel(c,
- tlb_read_color,
- format_swiz[i]);
- if (util_format_is_srgb(color_format) && i != 3) {
- linear_dst_color[i] =
- qir_srgb_decode(c, dst_color[i]);
- } else {
- linear_dst_color[i] = dst_color[i];
- }
- }
- /* Save the packed value for logic ops. Can't reuse r4
- * because other things might smash it (like sRGB)
- */
- packed_dst_color = qir_MOV(c, r4);
- }
-
- struct qreg blend_color[4];
- struct qreg undef_array[4] = {
- c->undef, c->undef, c->undef, c->undef
- };
- vc4_blend(c, blend_color, linear_dst_color,
- (c->output_color_index != -1 ?
- c->outputs + c->output_color_index :
- undef_array));
-
- if (util_format_is_srgb(color_format)) {
- for (int i = 0; i < 3; i++)
- blend_color[i] = qir_srgb_encode(c, blend_color[i]);
- }
-
- /* Debug: Sometimes you're getting a black output and just want to see
- * if the FS is getting executed at all. Spam magenta into the color
- * output.
- */
- if (0) {
- blend_color[0] = qir_uniform_f(c, 1.0);
- blend_color[1] = qir_uniform_f(c, 0.0);
- blend_color[2] = qir_uniform_f(c, 1.0);
- blend_color[3] = qir_uniform_f(c, 0.5);
- }
-
- struct qreg swizzled_outputs[4];
- for (int i = 0; i < 4; i++) {
- swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
- format_swiz[i]);
+ struct qreg color;
+ if (c->output_color_index != -1) {
+ color = c->outputs[c->output_color_index];
+ } else {
+ color = qir_uniform_ui(c, 0);
}
if (c->discard.file != QFILE_NULL)
@@ -1463,47 +1124,7 @@ emit_frag_end(struct vc4_compile *c)
qir_TLB_Z_WRITE(c, z);
}
- struct qreg packed_color = c->undef;
- for (int i = 0; i < 4; i++) {
- if (swizzled_outputs[i].file == QFILE_NULL)
- continue;
- if (packed_color.file == QFILE_NULL) {
- packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]);
- } else {
- packed_color = qir_PACK_8_F(c,
- packed_color,
- swizzled_outputs[i],
- i);
- }
- }
-
- if (packed_color.file == QFILE_NULL)
- packed_color = qir_uniform_ui(c, 0);
-
- if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
- packed_color = vc4_logicop(c, packed_color, packed_dst_color);
- }
-
- /* If the bit isn't set in the color mask, then just return the
- * original dst color, instead.
- */
- uint32_t colormask = 0xffffffff;
- for (int i = 0; i < 4; i++) {
- if (format_swiz[i] < 4 &&
- !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
- colormask &= ~(0xff << (i * 8));
- }
- }
- if (colormask != 0xffffffff) {
- packed_color = qir_OR(c,
- qir_AND(c, packed_color,
- qir_uniform_ui(c, colormask)),
- qir_AND(c, packed_dst_color,
- qir_uniform_ui(c, ~colormask)));
- }
-
- qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
- packed_color, c->undef));
+ qir_TLB_COLOR_WRITE(c, color);
}
static void
@@ -1695,6 +1316,7 @@ vc4_optimize_nir(struct nir_shader *s)
progress = nir_opt_peephole_select(s) || progress;
progress = nir_opt_algebraic(s) || progress;
progress = nir_opt_constant_folding(s) || progress;
+ progress = nir_opt_undef(s) || progress;
} while (progress);
}
@@ -1736,6 +1358,7 @@ ntq_setup_inputs(struct vc4_compile *c)
unsigned loc = var->data.driver_location;
assert(array_len == 1);
+ (void)array_len;
resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
(loc + 1) * 4);
@@ -1743,11 +1366,12 @@ ntq_setup_inputs(struct vc4_compile *c)
if (semantic_name == TGSI_SEMANTIC_POSITION) {
emit_fragcoord_input(c, loc);
} else if (semantic_name == TGSI_SEMANTIC_FACE) {
- emit_face_input(c, loc);
+ c->inputs[loc * 4 + 0] = qir_FRAG_REV_FLAG(c);
} else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
(c->fs_key->point_sprite_mask &
(1 << semantic_index))) {
- emit_point_coord_input(c, loc);
+ c->inputs[loc * 4 + 0] = c->point_x;
+ c->inputs[loc * 4 + 1] = c->point_y;
} else {
emit_fragment_input(c, loc,
semantic_name,
@@ -1770,6 +1394,13 @@ ntq_setup_outputs(struct vc4_compile *c)
unsigned loc = var->data.driver_location * 4;
assert(array_len == 1);
+ (void)array_len;
+
+ /* NIR hack to pass through
+ * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS */
+ if (semantic_name == TGSI_SEMANTIC_COLOR &&
+ semantic_index == -1)
+ semantic_index = 0;
for (int i = 0; i < 4; i++) {
add_output(c,
@@ -1834,8 +1465,7 @@ ntq_setup_registers(struct vc4_compile *c, struct exec_list *list)
static void
ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
{
- struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
- instr->def.num_components);
+ struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
for (int i = 0; i < instr->def.num_components; i++)
qregs[i] = qir_uniform_ui(c, instr->value.u[i]);
@@ -1843,47 +1473,59 @@ ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
}
static void
+ntq_emit_ssa_undef(struct vc4_compile *c, nir_ssa_undef_instr *instr)
+{
+ struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
+
+ /* QIR needs there to be *some* value, so pick 0 (same as for
+ * ntq_setup_registers().
+ */
+ for (int i = 0; i < instr->def.num_components; i++)
+ qregs[i] = qir_uniform_ui(c, 0);
+}
+
+static void
ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
{
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
struct qreg *dest = NULL;
if (info->has_dest) {
- dest = ntq_get_dest(c, instr->dest);
+ dest = ntq_get_dest(c, &instr->dest);
}
switch (instr->intrinsic) {
case nir_intrinsic_load_uniform:
- for (int i = 0; i < instr->num_components; i++) {
- dest[i] = qir_uniform(c, QUNIFORM_UNIFORM,
- instr->const_index[0] * 4 + i);
+ assert(instr->num_components == 1);
+ if (instr->const_index[0] < VC4_NIR_STATE_UNIFORM_OFFSET) {
+ *dest = qir_uniform(c, QUNIFORM_UNIFORM,
+ instr->const_index[0]);
+ } else {
+ *dest = qir_uniform(c, instr->const_index[0] -
+ VC4_NIR_STATE_UNIFORM_OFFSET,
+ 0);
}
break;
case nir_intrinsic_load_uniform_indirect:
- for (int i = 0; i < instr->num_components; i++) {
- dest[i] = indirect_uniform_load(c,
- ntq_get_src(c, instr->src[0], 0),
- (instr->const_index[0] *
- 4 + i) * sizeof(float));
- }
+ *dest = indirect_uniform_load(c, instr);
break;
case nir_intrinsic_load_input:
- for (int i = 0; i < instr->num_components; i++)
- dest[i] = c->inputs[instr->const_index[0] * 4 + i];
-
+ assert(instr->num_components == 1);
+ if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
+ *dest = qir_TLB_COLOR_READ(c);
+ } else {
+ *dest = c->inputs[instr->const_index[0]];
+ }
break;
case nir_intrinsic_store_output:
- for (int i = 0; i < instr->num_components; i++) {
- c->outputs[instr->const_index[0] * 4 + i] =
- qir_MOV(c, ntq_get_src(c, instr->src[0], i));
- }
- c->num_outputs = MAX2(c->num_outputs,
- instr->const_index[0] * 4 +
- instr->num_components + 1);
+ assert(instr->num_components == 1);
+ c->outputs[instr->const_index[0]] =
+ qir_MOV(c, ntq_get_src(c, instr->src[0], 0));
+ c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1);
break;
case nir_intrinsic_discard:
@@ -1927,6 +1569,10 @@ ntq_emit_instr(struct vc4_compile *c, nir_instr *instr)
ntq_emit_load_const(c, nir_instr_as_load_const(instr));
break;
+ case nir_instr_type_ssa_undef:
+ ntq_emit_ssa_undef(c, nir_instr_as_ssa_undef(instr));
+ break;
+
case nir_instr_type_tex:
ntq_emit_tex(c, nir_instr_as_tex(instr));
break;
@@ -2084,13 +1730,17 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
c->s = tgsi_to_nir(tokens, &nir_options);
nir_opt_global_to_local(c->s);
nir_convert_to_ssa(c->s);
+ if (stage == QSTAGE_FRAG)
+ vc4_nir_lower_blend(c);
+ vc4_nir_lower_io(c);
nir_lower_idiv(c->s);
+ nir_lower_load_const_to_scalar(c->s);
vc4_optimize_nir(c->s);
nir_remove_dead_variables(c->s);
- nir_convert_from_ssa(c->s);
+ nir_convert_from_ssa(c->s, true);
if (vc4_debug & VC4_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",
@@ -2187,6 +1837,8 @@ copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
memcpy(uinfo->contents, c->uniform_contents,
count * sizeof(*uinfo->contents));
uinfo->num_texture_samples = c->num_texture_samples;
+
+ vc4_set_shader_uniform_dirty_flags(shader);
}
static struct vc4_compiled_shader *
@@ -2259,9 +1911,8 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
}
copy_uniform_state_to_shader(shader, c);
- shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
- c->qpu_inst_count * sizeof(uint64_t),
- "code");
+ shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts,
+ c->qpu_inst_count * sizeof(uint64_t));
/* Copy the compiler UBO range state to the compiled shader, dropping
* out arrays that were never referenced by an indirect load.
@@ -2288,10 +1939,12 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
}
}
if (shader->ubo_size) {
- fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
- qir_get_stage_name(c->stage),
- c->program_id, c->variant_id,
- shader->ubo_size / 4);
+ if (vc4_debug & VC4_DEBUG_SHADERDB) {
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
+ qir_get_stage_name(c->stage),
+ c->program_id, c->variant_id,
+ shader->ubo_size / 4);
+ }
}
qir_compile_destroy(c);
@@ -2421,9 +2074,20 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
(prim_mode == PIPE_PRIM_POINTS &&
vc4->rasterizer->base.point_size_per_vertex);
- vc4->prog.vs = vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
+ struct vc4_compiled_shader *vs =
+ vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
+ if (vs != vc4->prog.vs) {
+ vc4->prog.vs = vs;
+ vc4->dirty |= VC4_DIRTY_COMPILED_VS;
+ }
+
key->is_coord = true;
- vc4->prog.cs = vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
+ struct vc4_compiled_shader *cs =
+ vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
+ if (cs != vc4->prog.cs) {
+ vc4->prog.cs = cs;
+ vc4->dirty |= VC4_DIRTY_COMPILED_CS;
+ }
}
void
@@ -2490,305 +2154,6 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
free(so);
}
-static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
-{
- switch (p_wrap) {
- case PIPE_TEX_WRAP_REPEAT:
- return 0;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return 1;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- return 2;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- return 3;
- case PIPE_TEX_WRAP_CLAMP:
- return (using_nearest ? 1 : 3);
- default:
- fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
- assert(!"not reached");
- return 0;
- }
-}
-
-static void
-write_texture_p0(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t unit)
-{
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
-
- cl_reloc(vc4, &vc4->uniforms, rsc->bo,
- VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
- VC4_SET_FIELD(texture->u.tex.last_level -
- texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
- VC4_SET_FIELD(texture->target == PIPE_TEXTURE_CUBE,
- VC4_TEX_P0_CMMODE) |
- VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE));
-}
-
-static void
-write_texture_p1(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t unit)
-{
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
- struct pipe_sampler_state *sampler = texstate->samplers[unit];
- static const uint8_t minfilter_map[6] = {
- VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
- VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
- VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
- VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
- VC4_TEX_P1_MINFILT_NEAREST,
- VC4_TEX_P1_MINFILT_LINEAR,
- };
- static const uint32_t magfilter_map[] = {
- [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
- [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
- };
-
- bool either_nearest =
- (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
- sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
-
- cl_aligned_u32(&vc4->uniforms,
- VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
- VC4_SET_FIELD(texture->texture->height0 & 2047,
- VC4_TEX_P1_HEIGHT) |
- VC4_SET_FIELD(texture->texture->width0 & 2047,
- VC4_TEX_P1_WIDTH) |
- VC4_SET_FIELD(magfilter_map[sampler->mag_img_filter],
- VC4_TEX_P1_MAGFILT) |
- VC4_SET_FIELD(minfilter_map[sampler->min_mip_filter * 2 +
- sampler->min_img_filter],
- VC4_TEX_P1_MINFILT) |
- VC4_SET_FIELD(translate_wrap(sampler->wrap_s, either_nearest),
- VC4_TEX_P1_WRAP_S) |
- VC4_SET_FIELD(translate_wrap(sampler->wrap_t, either_nearest),
- VC4_TEX_P1_WRAP_T));
-}
-
-static void
-write_texture_p2(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t data)
-{
- uint32_t unit = data & 0xffff;
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
-
- cl_aligned_u32(&vc4->uniforms,
- VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
- VC4_TEX_P2_PTYPE) |
- VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
- VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
-}
-
-
-#define SWIZ(x,y,z,w) { \
- UTIL_FORMAT_SWIZZLE_##x, \
- UTIL_FORMAT_SWIZZLE_##y, \
- UTIL_FORMAT_SWIZZLE_##z, \
- UTIL_FORMAT_SWIZZLE_##w \
-}
-
-static void
-write_texture_border_color(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t unit)
-{
- struct pipe_sampler_state *sampler = texstate->samplers[unit];
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
- union util_color uc;
-
- const struct util_format_description *tex_format_desc =
- util_format_description(texture->format);
-
- float border_color[4];
- for (int i = 0; i < 4; i++)
- border_color[i] = sampler->border_color.f[i];
- if (util_format_is_srgb(texture->format)) {
- for (int i = 0; i < 3; i++)
- border_color[i] =
- util_format_linear_to_srgb_float(border_color[i]);
- }
-
- /* Turn the border color into the layout of channels that it would
- * have when stored as texture contents.
- */
- float storage_color[4];
- util_format_unswizzle_4f(storage_color,
- border_color,
- tex_format_desc->swizzle);
-
- /* Now, pack so that when the vc4_format-sampled texture contents are
- * replaced with our border color, the vc4_get_format_swizzle()
- * swizzling will get the right channels.
- */
- if (util_format_is_depth_or_stencil(texture->format)) {
- uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
- sampler->border_color.f[0]) << 8;
- } else {
- switch (rsc->vc4_format) {
- default:
- case VC4_TEXTURE_TYPE_RGBA8888:
- util_pack_color(storage_color,
- PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
- break;
- case VC4_TEXTURE_TYPE_RGBA4444:
- util_pack_color(storage_color,
- PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
- break;
- case VC4_TEXTURE_TYPE_RGB565:
- util_pack_color(storage_color,
- PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- break;
- case VC4_TEXTURE_TYPE_ALPHA:
- uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
- break;
- case VC4_TEXTURE_TYPE_LUMALPHA:
- uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
- (float_to_ubyte(storage_color[0]) << 0));
- break;
- }
- }
-
- cl_aligned_u32(&vc4->uniforms, uc.ui[0]);
-}
-
-static uint32_t
-get_texrect_scale(struct vc4_texture_stateobj *texstate,
- enum quniform_contents contents,
- uint32_t data)
-{
- struct pipe_sampler_view *texture = texstate->textures[data];
- uint32_t dim;
-
- if (contents == QUNIFORM_TEXRECT_SCALE_X)
- dim = texture->texture->width0;
- else
- dim = texture->texture->height0;
-
- return fui(1.0f / dim);
-}
-
-static struct vc4_bo *
-vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
- const uint32_t *gallium_uniforms)
-{
- if (!shader->ubo_size)
- return NULL;
-
- struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
- uint32_t *data = vc4_bo_map(ubo);
- for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
- memcpy(data + shader->ubo_ranges[i].dst_offset,
- gallium_uniforms + shader->ubo_ranges[i].src_offset,
- shader->ubo_ranges[i].size);
- }
-
- return ubo;
-}
-
-void
-vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
- struct vc4_constbuf_stateobj *cb,
- struct vc4_texture_stateobj *texstate)
-{
- struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
- const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
- struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
-
- cl_ensure_space(&vc4->uniforms, (uinfo->count +
- uinfo->num_texture_samples) * 4);
-
- cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
-
- for (int i = 0; i < uinfo->count; i++) {
-
- switch (uinfo->contents[i]) {
- case QUNIFORM_CONSTANT:
- cl_aligned_u32(&vc4->uniforms, uinfo->data[i]);
- break;
- case QUNIFORM_UNIFORM:
- cl_aligned_u32(&vc4->uniforms,
- gallium_uniforms[uinfo->data[i]]);
- break;
- case QUNIFORM_VIEWPORT_X_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
- break;
- case QUNIFORM_VIEWPORT_Y_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
- break;
-
- case QUNIFORM_VIEWPORT_Z_OFFSET:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]);
- break;
- case QUNIFORM_VIEWPORT_Z_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]);
- break;
-
- case QUNIFORM_USER_CLIP_PLANE:
- cl_aligned_f(&vc4->uniforms,
- vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
- break;
-
- case QUNIFORM_TEXTURE_CONFIG_P0:
- write_texture_p0(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_TEXTURE_CONFIG_P1:
- write_texture_p1(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_TEXTURE_CONFIG_P2:
- write_texture_p2(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_UBO_ADDR:
- cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0);
- break;
-
- case QUNIFORM_TEXTURE_BORDER_COLOR:
- write_texture_border_color(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_TEXRECT_SCALE_X:
- case QUNIFORM_TEXRECT_SCALE_Y:
- cl_aligned_u32(&vc4->uniforms,
- get_texrect_scale(texstate,
- uinfo->contents[i],
- uinfo->data[i]));
- break;
-
- case QUNIFORM_BLEND_CONST_COLOR:
- cl_aligned_f(&vc4->uniforms,
- CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1));
- break;
-
- case QUNIFORM_STENCIL:
- cl_aligned_u32(&vc4->uniforms,
- vc4->zsa->stencil_uniforms[uinfo->data[i]] |
- (uinfo->data[i] <= 1 ?
- (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
- 0));
- break;
-
- case QUNIFORM_ALPHA_REF:
- cl_aligned_f(&vc4->uniforms,
- vc4->zsa->base.alpha.ref_value);
- break;
- }
-#if 0
- uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
- fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
- shader, i, written_val, uif(written_val));
-#endif
- }
-}
-
static void
vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 1c96ef4795f..254140a72f5 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -96,10 +96,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_TEX_B] = { "tex_b", 0, 2 },
[QOP_TEX_DIRECT] = { "tex_direct", 0, 2 },
[QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
- [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 1 },
- [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 1 },
- [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 1 },
- [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 1 },
[QOP_UNPACK_8A_F] = { "unpack_8a_f", 1, 1 },
[QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 },
[QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 },
@@ -234,20 +230,6 @@ qir_writes_r4(struct qinst *inst)
}
}
-bool
-qir_reads_r4(struct qinst *inst)
-{
- switch (inst->op) {
- case QOP_R4_UNPACK_A:
- case QOP_R4_UNPACK_B:
- case QOP_R4_UNPACK_C:
- case QOP_R4_UNPACK_D:
- return true;
- default:
- return false;
- }
-}
-
static void
qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 732cfd0b306..cade795c12a 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -36,6 +36,11 @@
#include "util/list.h"
#include "util/u_math.h"
+#include "vc4_screen.h"
+#include "pipe/p_state.h"
+
+struct nir_builder;
+
enum qfile {
QFILE_NULL,
QFILE_TEMP,
@@ -155,10 +160,6 @@ enum qop {
* the destination
*/
QOP_TEX_RESULT,
- QOP_R4_UNPACK_A,
- QOP_R4_UNPACK_B,
- QOP_R4_UNPACK_C,
- QOP_R4_UNPACK_D
};
struct queued_qpu_inst {
@@ -243,7 +244,11 @@ enum quniform_contents {
QUNIFORM_TEXTURE_BORDER_COLOR,
- QUNIFORM_BLEND_CONST_COLOR,
+ QUNIFORM_BLEND_CONST_COLOR_X,
+ QUNIFORM_BLEND_CONST_COLOR_Y,
+ QUNIFORM_BLEND_CONST_COLOR_Z,
+ QUNIFORM_BLEND_CONST_COLOR_W,
+
QUNIFORM_STENCIL,
QUNIFORM_ALPHA_REF,
@@ -280,6 +285,52 @@ struct vc4_compiler_ubo_range {
bool used;
};
+struct vc4_key {
+ struct vc4_uncompiled_shader *shader_state;
+ struct {
+ enum pipe_format format;
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned wrap_s:3;
+ unsigned wrap_t:3;
+ uint8_t swizzle[4];
+ } tex[VC4_MAX_TEXTURE_SAMPLERS];
+ uint8_t ucp_enables;
+};
+
+struct vc4_fs_key {
+ struct vc4_key base;
+ enum pipe_format color_format;
+ bool depth_enabled;
+ bool stencil_enabled;
+ bool stencil_twoside;
+ bool stencil_full_writemasks;
+ bool is_points;
+ bool is_lines;
+ bool alpha_test;
+ bool point_coord_upper_left;
+ bool light_twoside;
+ uint8_t alpha_test_func;
+ uint8_t logicop_func;
+ uint32_t point_sprite_mask;
+
+ struct pipe_rt_blend_state blend;
+};
+
+struct vc4_vs_key {
+ struct vc4_key base;
+
+ /**
+ * This is a proxy for the array of FS input semantics, which is
+ * larger than we would want to put in the key.
+ */
+ uint64_t compiled_fs_id;
+
+ enum pipe_format attr_formats[8];
+ bool is_coord;
+ bool per_vertex_point_size;
+};
+
struct vc4_compile {
struct vc4_context *vc4;
nir_shader *s;
@@ -369,6 +420,16 @@ struct vc4_compile {
uint32_t variant_id;
};
+/* Special nir_load_input intrinsic index for loading the current TLB
+ * destination color.
+ */
+#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000
+
+/* Special offset for nir_load_uniform values to get a QUNIFORM_*
+ * state-dependent value.
+ */
+#define VC4_NIR_STATE_UNIFORM_OFFSET 2000000000
+
struct vc4_compile *qir_compile_init(void);
void qir_compile_destroy(struct vc4_compile *c);
struct qinst *qir_inst(enum qop op, struct qreg dst,
@@ -393,7 +454,6 @@ bool qir_is_multi_instruction(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
-bool qir_reads_r4(struct qinst *inst);
bool qir_src_needs_a_file(struct qinst *inst);
struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
@@ -409,6 +469,12 @@ bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
bool qir_opt_small_immediates(struct vc4_compile *c);
bool qir_opt_vpm_writes(struct vc4_compile *c);
+void vc4_nir_lower_blend(struct vc4_compile *c);
+void vc4_nir_lower_io(struct vc4_compile *c);
+nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
+ enum quniform_contents contents);
+nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
+ nir_ssa_def **srcs, int swiz);
void qir_lower_uniforms(struct vc4_compile *c);
void qpu_schedule_instructions(struct vc4_compile *c);
@@ -523,27 +589,12 @@ QIR_ALU0(FRAG_W)
QIR_ALU0(FRAG_REV_FLAG)
QIR_ALU0(TEX_RESULT)
QIR_ALU0(TLB_COLOR_READ)
+QIR_NODST_1(TLB_COLOR_WRITE)
QIR_NODST_1(TLB_Z_WRITE)
QIR_NODST_1(TLB_DISCARD_SETUP)
QIR_NODST_1(TLB_STENCIL_SETUP)
static inline struct qreg
-qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
-{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef));
- return t;
-}
-
-static inline struct qreg
-qir_SEL_X_0_COND(struct vc4_compile *c, int i)
-{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
- return t;
-}
-
-static inline struct qreg
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
{
struct qreg t = qir_get_temp(c);
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index 910c89dca79..f087c3b81b5 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -52,7 +52,7 @@ static void
add_uniform(struct hash_table *ht, struct qreg reg)
{
struct hash_entry *entry;
- void *key = (void *)(uintptr_t)reg.index;
+ void *key = (void *)(uintptr_t)(reg.index + 1);
entry = _mesa_hash_table_search(ht, key);
if (entry) {
@@ -66,7 +66,7 @@ static void
remove_uniform(struct hash_table *ht, struct qreg reg)
{
struct hash_entry *entry;
- void *key = (void *)(uintptr_t)reg.index;
+ void *key = (void *)(uintptr_t)(reg.index + 1);
entry = _mesa_hash_table_search(ht, key);
assert(entry);
@@ -122,7 +122,7 @@ qir_lower_uniforms(struct vc4_compile *c)
struct hash_entry *entry;
hash_table_foreach(ht, entry) {
uint32_t count = (uintptr_t)entry->data;
- uint32_t index = (uintptr_t)entry->key;
+ uint32_t index = (uintptr_t)entry->key - 1;
if (count > max_count) {
max_count = count;
max_index = index;
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index c9ab6344589..fbb90ba12a0 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -122,23 +122,23 @@ static inline struct qpu_reg qpu_r3(void) { return qpu_rn(3); }
static inline struct qpu_reg qpu_r4(void) { return qpu_rn(4); }
static inline struct qpu_reg qpu_r5(void) { return qpu_rn(5); }
-uint64_t qpu_NOP(void);
-uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src);
-uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src);
+uint64_t qpu_NOP(void) ATTRIBUTE_CONST;
+uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST;
+uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST;
uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst,
- struct qpu_reg src0, struct qpu_reg src1);
+ struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
- struct qpu_reg src0, struct qpu_reg src1);
-uint64_t qpu_merge_inst(uint64_t a, uint64_t b);
-uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val);
-uint64_t qpu_set_sig(uint64_t inst, uint32_t sig);
-uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond);
-uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond);
-uint32_t qpu_encode_small_immediate(uint32_t i);
-
-bool qpu_waddr_is_tlb(uint32_t waddr);
-bool qpu_inst_is_tlb(uint64_t inst);
-int qpu_num_sf_accesses(uint64_t inst);
+ struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
+uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
+uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
+uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
+uint32_t qpu_encode_small_immediate(uint32_t i) ATTRIBUTE_CONST;
+
+bool qpu_waddr_is_tlb(uint32_t waddr) ATTRIBUTE_CONST;
+bool qpu_inst_is_tlb(uint64_t inst) ATTRIBUTE_CONST;
+int qpu_num_sf_accesses(uint64_t inst) ATTRIBUTE_CONST;
void qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst);
static inline uint64_t
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index 55e0e6139b5..00aeb300a9b 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -225,7 +225,7 @@ static const char *qpu_condflags[] = {
};
#define DESC(array, index) \
- ((index > ARRAY_SIZE(array) || !(array)[index]) ? \
+ ((index >= ARRAY_SIZE(array) || !(array)[index]) ? \
"???" : (array)[index])
static const char *
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 99afe4b8798..f324056258c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -234,6 +234,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QFILE_VPM:
assert((int)qinst->src[i].index >=
last_vpm_read_index);
+ (void)last_vpm_read_index;
last_vpm_read_index = qinst->src[i].index;
src[i] = qpu_ra(QPU_R_VPM);
break;
@@ -319,7 +320,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
abort();
}
- queue(c, qpu_a_MOV(dst, qpu_r4()));
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
@@ -402,6 +404,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_COLOR_LOAD);
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
case QOP_TLB_COLOR_WRITE:
@@ -451,21 +455,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_NOP());
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_LOAD_TMU0);
-
- break;
-
- case QOP_R4_UNPACK_A:
- case QOP_R4_UNPACK_B:
- case QOP_R4_UNPACK_C:
- case QOP_R4_UNPACK_D:
- assert(src[0].mux == QPU_MUX_R4);
- queue(c, qpu_a_MOV(dst, src[0]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
- (qinst->op -
- QOP_R4_UNPACK_A),
- QPU_UNPACK);
-
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
case QOP_UNPACK_8A_F:
@@ -474,20 +465,30 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_UNPACK_8D_F:
case QOP_UNPACK_16A_F:
case QOP_UNPACK_16B_F: {
- assert(src[0].mux == QPU_MUX_A);
-
- /* Since we're setting the pack bits, if the
- * destination is in A it would get re-packed.
- */
- queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
- qpu_rb(31) : dst),
- src[0], src[0]));
- *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
- QOP_UNPACK_8A_F],
- QPU_UNPACK);
+ if (src[0].mux == QPU_MUX_R4) {
+ queue(c, qpu_a_MOV(dst, src[0]));
+ *last_inst(c) |= QPU_PM;
+ *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
+ (qinst->op -
+ QOP_UNPACK_8A_F),
+ QPU_UNPACK);
+ } else {
+ assert(src[0].mux == QPU_MUX_A);
- if (dst.mux == QPU_MUX_A) {
- queue(c, qpu_a_MOV(dst, qpu_rb(31)));
+ /* Since we're setting the pack bits, if the
+ * destination is in A it would get re-packed.
+ */
+ queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
+ qpu_rb(31) : dst),
+ src[0], src[0]));
+ *last_inst(c) |=
+ QPU_SET_FIELD(unpack_map[qinst->op -
+ QOP_UNPACK_8A_F],
+ QPU_UNPACK);
+
+ if (dst.mux == QPU_MUX_A) {
+ queue(c, qpu_a_MOV(dst, qpu_rb(31)));
+ }
}
}
break;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c
index 8471edbf62c..9cf6841f41c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_validate.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c
@@ -23,6 +23,13 @@
#include "vc4_qpu.h"
+#ifdef NDEBUG
+/* Since most of our code is used in assert()s, don't warn about dead code. */
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
static bool
writes_reg(uint64_t inst, uint32_t w)
{
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 3b0b890b66a..a29db1f3abe 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -116,6 +116,8 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs));
vc4->reg_class_any = ra_alloc_reg_class(vc4->regs);
+ vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs);
+ vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) {
/* Reserve ra31/rb31 for spilling fixup_raddr_conflict() in
* vc4_qpu_emit.c
@@ -126,15 +128,18 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
/* R4 can't be written as a general purpose register. (it's
* TMU_NOSWAP as a write address).
*/
- if (vc4_regs[i].mux == QPU_MUX_R4)
+ if (vc4_regs[i].mux == QPU_MUX_R4) {
+ ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
continue;
+ }
ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
}
- vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
- for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2)
+ for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) {
ra_class_add_reg(vc4->regs, vc4->reg_class_a, i);
+ ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
+ }
ra_set_finalize(vc4->regs, NULL);
}
@@ -153,6 +158,10 @@ node_to_temp_priority(const void *in_a, const void *in_b)
return a->priority - b->priority;
}
+#define CLASS_BIT_A (1 << 0)
+#define CLASS_BIT_B_OR_ACC (1 << 1)
+#define CLASS_BIT_R4 (1 << 2)
+
/**
* Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
*
@@ -165,6 +174,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
uint32_t temp_to_node[c->num_temps];
uint32_t def[c->num_temps];
uint32_t use[c->num_temps];
+ uint8_t class_bits[c->num_temps];
struct qpu_reg *temp_registers = calloc(c->num_temps,
sizeof(*temp_registers));
memset(def, 0, sizeof(def));
@@ -181,10 +191,6 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
struct ra_graph *g = ra_alloc_interference_graph(vc4->regs,
c->num_temps);
- for (uint32_t i = 0; i < c->num_temps; i++) {
- ra_set_node_class(g, i, vc4->reg_class_any);
- }
-
/* Compute the live ranges so we can figure out interference.
*/
uint32_t ip = 0;
@@ -223,8 +229,33 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
temp_to_node[map[i].temp] = i;
}
- /* Figure out our register classes and preallocated registers*/
+ /* Figure out our register classes and preallocated registers. We
+ * start with any temp being able to be in any file, then instructions
+ * incrementally remove bits that the temp definitely can't be in.
+ */
+ memset(class_bits,
+ CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4,
+ sizeof(class_bits));
+
+ ip = 0;
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
+ if (qir_writes_r4(inst)) {
+ /* This instruction writes r4 (and optionally moves
+ * its result to a temp), so nothing else can be
+ * stored in r4 across it.
+ */
+ for (int i = 0; i < c->num_temps; i++) {
+ if (def[i] < ip && use[i] > ip)
+ class_bits[i] &= ~CLASS_BIT_R4;
+ }
+ } else {
+ /* R4 can't be written as a general purpose
+ * register. (it's TMU_NOSWAP as a write address).
+ */
+ if (inst->dst.file == QFILE_TEMP)
+ class_bits[inst->dst.index] &= ~CLASS_BIT_R4;
+ }
+
switch (inst->op) {
case QOP_FRAG_Z:
ra_set_node_reg(g, temp_to_node[inst->dst.index],
@@ -236,17 +267,9 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
break;
- case QOP_TEX_RESULT:
- case QOP_TLB_COLOR_READ:
- assert(vc4_regs[ACC_INDEX + 4].mux == QPU_MUX_R4);
- ra_set_node_reg(g, temp_to_node[inst->dst.index],
- ACC_INDEX + 4);
- break;
-
case QOP_PACK_SCALED:
/* The pack flags require an A-file dst register. */
- ra_set_node_class(g, temp_to_node[inst->dst.index],
- vc4->reg_class_a);
+ class_bits[inst->dst.index] &= CLASS_BIT_A;
break;
default:
@@ -254,8 +277,30 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
}
if (qir_src_needs_a_file(inst)) {
- ra_set_node_class(g, temp_to_node[inst->src[0].index],
- vc4->reg_class_a);
+ class_bits[inst->src[0].index] &= CLASS_BIT_A;
+ }
+ ip++;
+ }
+
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ int node = temp_to_node[i];
+
+ switch (class_bits[i]) {
+ case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4:
+ case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
+ ra_set_node_class(g, node, vc4->reg_class_any);
+ break;
+ case CLASS_BIT_A | CLASS_BIT_R4:
+ ra_set_node_class(g, node, vc4->reg_class_r4_or_a);
+ break;
+ case CLASS_BIT_A:
+ ra_set_node_class(g, node, vc4->reg_class_a);
+ break;
+ default:
+ fprintf(stderr, "temp %d: bad class bits: 0x%x\n",
+ i, class_bits[i]);
+ abort();
+ break;
}
}
@@ -270,7 +315,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
}
bool ok = ra_allocate(g);
- assert(ok);
+ if (!ok) {
+ fprintf(stderr, "Failed to register allocate:\n");
+ qir_dump(c);
+ abort();
+ }
for (uint32_t i = 0; i < c->num_temps; i++) {
temp_registers[i] = vc4_regs[ra_get_node_reg(g, temp_to_node[i])];
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index cab76406055..5d5166fd818 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -102,6 +102,12 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
vc4_resource_bo_alloc(rsc);
+
+ /* If it might be bound as one of our vertex buffers, make
+ * sure we re-emit vertex buffer state.
+ */
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc4->dirty |= VC4_DIRTY_VTXBUF;
} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
if (vc4_cl_references_bo(pctx, rsc->bo)) {
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
@@ -110,6 +116,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
prsc->height0 == box->height &&
prsc->depth0 == box->depth) {
vc4_resource_bo_alloc(rsc);
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc4->dirty |= VC4_DIRTY_VTXBUF;
} else {
vc4_flush(pctx);
}
diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h
index ab8f5d3cd55..87571b75e8b 100644
--- a/src/gallium/drivers/vc4/vc4_resource.h
+++ b/src/gallium/drivers/vc4/vc4_resource.h
@@ -82,19 +82,19 @@ struct vc4_resource {
struct pipe_resource *shadow_parent;
};
-static INLINE struct vc4_resource *
+static inline struct vc4_resource *
vc4_resource(struct pipe_resource *prsc)
{
return (struct vc4_resource *)prsc;
}
-static INLINE struct vc4_surface *
+static inline struct vc4_surface *
vc4_surface(struct pipe_surface *psurf)
{
return (struct vc4_surface *)psurf;
}
-static INLINE struct vc4_transfer *
+static inline struct vc4_transfer *
vc4_transfer(struct pipe_transfer *ptrans)
{
return (struct vc4_transfer *)ptrans;
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index f63bead0fbb..2dee1d40e5f 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -176,6 +176,10 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
/* Stream output. */
@@ -489,6 +493,12 @@ vc4_screen_bo_get_handle(struct pipe_screen *pscreen,
{
whandle->stride = stride;
+ /* If we're passing some reference to our BO out to some other part of
+ * the system, then we can't do any optimizations about only us being
+ * the ones seeing it (like BO caching or shadow update avoidance).
+ */
+ bo->private = false;
+
switch (whandle->type) {
case DRM_API_HANDLE_TYPE_SHARED:
return vc4_bo_flink(bo, &whandle->handle);
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index b58013dd2ee..7cfd236349d 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -74,11 +74,12 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec)
struct vc4_bo **bos = vc4->bo_pointers.base;
exec->bo_count = args->bo_handle_count;
- exec->bo = calloc(exec->bo_count, sizeof(struct vc4_bo_exec_state));
+ exec->bo = calloc(exec->bo_count, sizeof(void *));
for (int i = 0; i < exec->bo_count; i++) {
struct vc4_bo *bo = bos[i];
struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
+ struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
#if 0
fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
#endif
@@ -86,7 +87,16 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec)
vc4_bo_map(bo);
memcpy(obj->vaddr, bo->map, bo->size);
- exec->bo[i].bo = obj;
+ exec->bo[i] = obj;
+
+ /* The kernel does this validation at shader create ioctl
+ * time.
+ */
+ if (strcmp(bo->name, "code") == 0) {
+ drm_bo->validated_shader = vc4_validate_shader(obj);
+ if (!drm_bo->validated_shader)
+ abort();
+ }
}
return 0;
}
@@ -95,7 +105,7 @@ static int
vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
{
for (int i = 0; i < exec->bo_count; i++) {
- struct drm_gem_cma_object *obj = exec->bo[i].bo;
+ struct drm_gem_cma_object *obj = exec->bo[i];
struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo;
memcpy(bo->map, obj->vaddr, bo->size);
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h
index 2bb36b253bb..68ace0216aa 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.h
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -78,6 +78,7 @@ struct drm_gem_cma_object {
struct drm_vc4_bo {
struct drm_gem_cma_object base;
struct vc4_bo *bo;
+ struct vc4_validated_shader_info *validated_shader;
struct list_head unref_head;
};
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 4a1d4c3a4d6..8a759c2ca4c 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -107,7 +107,7 @@ vc4_create_rasterizer_state(struct pipe_context *pctx,
/* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
* BCM21553).
*/
- so->point_size = MAX2(cso->point_size, .125);
+ so->point_size = MAX2(cso->point_size, .125f);
if (cso->front_ccw)
so->config_bits[0] |= VC4_CONFIG_BITS_CW_PRIMITIVES;
@@ -461,11 +461,64 @@ vc4_get_stage_tex(struct vc4_context *vc4, unsigned shader)
}
}
+static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
+{
+ switch (p_wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return 0;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return 1;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return 2;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return 3;
+ case PIPE_TEX_WRAP_CLAMP:
+ return (using_nearest ? 1 : 3);
+ default:
+ fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
+ assert(!"not reached");
+ return 0;
+ }
+}
+
static void *
vc4_create_sampler_state(struct pipe_context *pctx,
const struct pipe_sampler_state *cso)
{
- return vc4_generic_cso_state_create(cso, sizeof(*cso));
+ static const uint8_t minfilter_map[6] = {
+ VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
+ VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
+ VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
+ VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
+ VC4_TEX_P1_MINFILT_NEAREST,
+ VC4_TEX_P1_MINFILT_LINEAR,
+ };
+ static const uint32_t magfilter_map[] = {
+ [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
+ [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
+ };
+ bool either_nearest =
+ (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+ cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+ struct vc4_sampler_state *so = CALLOC_STRUCT(vc4_sampler_state);
+
+ if (!so)
+ return NULL;
+
+ memcpy(so, cso, sizeof(*cso));
+
+ so->texture_p1 =
+ (VC4_SET_FIELD(magfilter_map[cso->mag_img_filter],
+ VC4_TEX_P1_MAGFILT) |
+ VC4_SET_FIELD(minfilter_map[cso->min_mip_filter * 2 +
+ cso->min_img_filter],
+ VC4_TEX_P1_MINFILT) |
+ VC4_SET_FIELD(translate_wrap(cso->wrap_s, either_nearest),
+ VC4_TEX_P1_WRAP_S) |
+ VC4_SET_FIELD(translate_wrap(cso->wrap_t, either_nearest),
+ VC4_TEX_P1_WRAP_T));
+
+ return so;
}
static void
@@ -499,13 +552,13 @@ static struct pipe_sampler_view *
vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
{
- struct pipe_sampler_view *so = malloc(sizeof(*so));
+ struct vc4_sampler_view *so = malloc(sizeof(*so));
struct vc4_resource *rsc = vc4_resource(prsc);
if (!so)
return NULL;
- *so = *cso;
+ so->base = *cso;
pipe_reference(NULL, &prsc->reference);
@@ -516,18 +569,19 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
* Also, Raspberry Pi doesn't support sampling from raster textures,
* so we also have to copy to a temporary then.
*/
- if (so->u.tex.first_level ||
+ if (cso->u.tex.first_level ||
rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
struct vc4_resource *shadow_parent = vc4_resource(prsc);
struct pipe_resource tmpl = shadow_parent->base.b;
struct vc4_resource *clone;
tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
- tmpl.width0 = u_minify(tmpl.width0, so->u.tex.first_level);
- tmpl.height0 = u_minify(tmpl.height0, so->u.tex.first_level);
- tmpl.last_level = so->u.tex.last_level - so->u.tex.first_level;
+ tmpl.width0 = u_minify(tmpl.width0, cso->u.tex.first_level);
+ tmpl.height0 = u_minify(tmpl.height0, cso->u.tex.first_level);
+ tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;
prsc = vc4_resource_create(pctx->screen, &tmpl);
+ rsc = vc4_resource(prsc);
clone = vc4_resource(prsc);
clone->shadow_parent = &shadow_parent->base.b;
/* Flag it as needing update of the contents from the parent. */
@@ -535,11 +589,23 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
}
- so->texture = prsc;
- so->reference.count = 1;
- so->context = pctx;
-
- return so;
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ so->texture_p0 =
+ (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
+ VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
+ VC4_SET_FIELD(cso->u.tex.last_level -
+ cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
+ VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE,
+ VC4_TEX_P0_CMMODE));
+ so->texture_p1 =
+ (VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
+ VC4_SET_FIELD(prsc->height0 & 2047, VC4_TEX_P1_HEIGHT) |
+ VC4_SET_FIELD(prsc->width0 & 2047, VC4_TEX_P1_WIDTH));
+
+ return &so->base;
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_tiling.c b/src/gallium/drivers/vc4/vc4_tiling.c
index f9801c9cefd..cf86eb0fa31 100644
--- a/src/gallium/drivers/vc4/vc4_tiling.c
+++ b/src/gallium/drivers/vc4/vc4_tiling.c
@@ -127,13 +127,10 @@ vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp)
static void
check_box_utile_alignment(const struct pipe_box *box, int cpp)
{
- uint32_t utile_w = vc4_utile_width(cpp);
- uint32_t utile_h = vc4_utile_height(cpp);
-
- assert(!(box->x & (utile_w - 1)));
- assert(!(box->y & (utile_h - 1)));
- assert(!(box->width & (utile_w - 1)));
- assert(!(box->height & (utile_h - 1)));
+ assert(!(box->x & (vc4_utile_width(cpp) - 1)));
+ assert(!(box->y & (vc4_utile_height(cpp) - 1)));
+ assert(!(box->width & (vc4_utile_width(cpp) - 1)));
+ assert(!(box->height & (vc4_utile_height(cpp) - 1)));
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_tiling.h b/src/gallium/drivers/vc4/vc4_tiling.h
index b5d10da3417..b90bba70200 100644
--- a/src/gallium/drivers/vc4/vc4_tiling.h
+++ b/src/gallium/drivers/vc4/vc4_tiling.h
@@ -24,9 +24,9 @@
#ifndef VC4_TILING_H
#define VC4_TILING_H
-uint32_t vc4_utile_width(int cpp);
-uint32_t vc4_utile_height(int cpp);
-bool vc4_size_is_lt(uint32_t width, uint32_t height, int cpp);
+uint32_t vc4_utile_width(int cpp) ATTRIBUTE_CONST;
+uint32_t vc4_utile_height(int cpp) ATTRIBUTE_CONST;
+bool vc4_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
void vc4_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp);
void vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp);
void vc4_load_tiled_image(void *dst, uint32_t dst_stride,
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c
new file mode 100644
index 00000000000..85d6998205e
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_pack_color.h"
+#include "util/format_srgb.h"
+
+#include "vc4_context.h"
+#include "vc4_qir.h"
+
+static void
+write_texture_p0(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct vc4_sampler_view *sview =
+ vc4_sampler_view(texstate->textures[unit]);
+ struct vc4_resource *rsc = vc4_resource(sview->base.texture);
+
+ cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, sview->texture_p0);
+}
+
+static void
+write_texture_p1(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct vc4_sampler_view *sview =
+ vc4_sampler_view(texstate->textures[unit]);
+ struct vc4_sampler_state *sampler =
+ vc4_sampler_state(texstate->samplers[unit]);
+
+ cl_aligned_u32(uniforms, sview->texture_p1 | sampler->texture_p1);
+}
+
+static void
+write_texture_p2(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t data)
+{
+ uint32_t unit = data & 0xffff;
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc4_resource *rsc = vc4_resource(texture->texture);
+
+ cl_aligned_u32(uniforms,
+ VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
+ VC4_TEX_P2_PTYPE) |
+ VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
+ VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
+}
+
+
+#define SWIZ(x,y,z,w) { \
+ UTIL_FORMAT_SWIZZLE_##x, \
+ UTIL_FORMAT_SWIZZLE_##y, \
+ UTIL_FORMAT_SWIZZLE_##z, \
+ UTIL_FORMAT_SWIZZLE_##w \
+}
+
+static void
+write_texture_border_color(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct pipe_sampler_state *sampler = texstate->samplers[unit];
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc4_resource *rsc = vc4_resource(texture->texture);
+ union util_color uc;
+
+ const struct util_format_description *tex_format_desc =
+ util_format_description(texture->format);
+
+ float border_color[4];
+ for (int i = 0; i < 4; i++)
+ border_color[i] = sampler->border_color.f[i];
+ if (util_format_is_srgb(texture->format)) {
+ for (int i = 0; i < 3; i++)
+ border_color[i] =
+ util_format_linear_to_srgb_float(border_color[i]);
+ }
+
+ /* Turn the border color into the layout of channels that it would
+ * have when stored as texture contents.
+ */
+ float storage_color[4];
+ util_format_unswizzle_4f(storage_color,
+ border_color,
+ tex_format_desc->swizzle);
+
+ /* Now, pack so that when the vc4_format-sampled texture contents are
+ * replaced with our border color, the vc4_get_format_swizzle()
+ * swizzling will get the right channels.
+ */
+ if (util_format_is_depth_or_stencil(texture->format)) {
+ uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
+ sampler->border_color.f[0]) << 8;
+ } else {
+ switch (rsc->vc4_format) {
+ default:
+ case VC4_TEXTURE_TYPE_RGBA8888:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ break;
+ case VC4_TEXTURE_TYPE_RGBA4444:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
+ break;
+ case VC4_TEXTURE_TYPE_RGB565:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ break;
+ case VC4_TEXTURE_TYPE_ALPHA:
+ uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
+ break;
+ case VC4_TEXTURE_TYPE_LUMALPHA:
+ uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
+ (float_to_ubyte(storage_color[0]) << 0));
+ break;
+ }
+ }
+
+ cl_aligned_u32(uniforms, uc.ui[0]);
+}
+
+static uint32_t
+get_texrect_scale(struct vc4_texture_stateobj *texstate,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ struct pipe_sampler_view *texture = texstate->textures[data];
+ uint32_t dim;
+
+ if (contents == QUNIFORM_TEXRECT_SCALE_X)
+ dim = texture->texture->width0;
+ else
+ dim = texture->texture->height0;
+
+ return fui(1.0f / dim);
+}
+
+static struct vc4_bo *
+vc4_upload_ubo(struct vc4_context *vc4,
+ struct vc4_compiled_shader *shader,
+ const uint32_t *gallium_uniforms)
+{
+ if (!shader->ubo_size)
+ return NULL;
+
+ struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
+ uint32_t *data = vc4_bo_map(ubo);
+ for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
+ memcpy(data + shader->ubo_ranges[i].dst_offset,
+ gallium_uniforms + shader->ubo_ranges[i].src_offset,
+ shader->ubo_ranges[i].size);
+ }
+
+ return ubo;
+}
+
+void
+vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+ struct vc4_constbuf_stateobj *cb,
+ struct vc4_texture_stateobj *texstate)
+{
+ struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
+ const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+ struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
+
+ cl_ensure_space(&vc4->uniforms, (uinfo->count +
+ uinfo->num_texture_samples) * 4);
+
+ struct vc4_cl_out *uniforms =
+ cl_start_shader_reloc(&vc4->uniforms,
+ uinfo->num_texture_samples);
+
+ for (int i = 0; i < uinfo->count; i++) {
+
+ switch (uinfo->contents[i]) {
+ case QUNIFORM_CONSTANT:
+ cl_aligned_u32(&uniforms, uinfo->data[i]);
+ break;
+ case QUNIFORM_UNIFORM:
+ cl_aligned_u32(&uniforms,
+ gallium_uniforms[uinfo->data[i]]);
+ break;
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f);
+ break;
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ cl_aligned_f(&uniforms, vc4->viewport.scale[1] * 16.0f);
+ break;
+
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ cl_aligned_f(&uniforms, vc4->viewport.translate[2]);
+ break;
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ cl_aligned_f(&uniforms, vc4->viewport.scale[2]);
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ cl_aligned_f(&uniforms,
+ vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P0:
+ write_texture_p0(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ write_texture_p1(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P2:
+ write_texture_p2(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_UBO_ADDR:
+ cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
+ break;
+
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ write_texture_border_color(vc4, &uniforms,
+ texstate, uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ cl_aligned_u32(&uniforms,
+ get_texrect_scale(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
+ break;
+
+ case QUNIFORM_BLEND_CONST_COLOR_X:
+ case QUNIFORM_BLEND_CONST_COLOR_Y:
+ case QUNIFORM_BLEND_CONST_COLOR_Z:
+ case QUNIFORM_BLEND_CONST_COLOR_W:
+ cl_aligned_f(&uniforms,
+ CLAMP(vc4->blend_color.color[uinfo->contents[i] -
+ QUNIFORM_BLEND_CONST_COLOR_X],
+ 0, 1));
+ break;
+
+ case QUNIFORM_STENCIL:
+ cl_aligned_u32(&uniforms,
+ vc4->zsa->stencil_uniforms[uinfo->data[i]] |
+ (uinfo->data[i] <= 1 ?
+ (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+ 0));
+ break;
+
+ case QUNIFORM_ALPHA_REF:
+ cl_aligned_f(&uniforms,
+ vc4->zsa->base.alpha.ref_value);
+ break;
+ }
+#if 0
+ uint32_t written_val = *((uint32_t *)uniforms - 1);
+ fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
+ shader, i, written_val, uif(written_val));
+#endif
+ }
+
+ cl_end(&vc4->uniforms, uniforms);
+
+ vc4_bo_unreference(&ubo);
+}
+
+void
+vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
+{
+ uint32_t dirty = 0;
+
+ for (int i = 0; i < shader->uniforms.count; i++) {
+ switch (shader->uniforms.contents[i]) {
+ case QUNIFORM_CONSTANT:
+ break;
+ case QUNIFORM_UNIFORM:
+ case QUNIFORM_UBO_ADDR:
+ dirty |= VC4_DIRTY_CONSTBUF;
+ break;
+
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ dirty |= VC4_DIRTY_VIEWPORT;
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ dirty |= VC4_DIRTY_CLIP;
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P0:
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ case QUNIFORM_TEXTURE_CONFIG_P2:
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ dirty |= VC4_DIRTY_TEXSTATE;
+ break;
+
+ case QUNIFORM_BLEND_CONST_COLOR_X:
+ case QUNIFORM_BLEND_CONST_COLOR_Y:
+ case QUNIFORM_BLEND_CONST_COLOR_Z:
+ case QUNIFORM_BLEND_CONST_COLOR_W:
+ dirty |= VC4_DIRTY_BLEND_COLOR;
+ break;
+
+ case QUNIFORM_STENCIL:
+ case QUNIFORM_ALPHA_REF:
+ dirty |= VC4_DIRTY_ZSA;
+ break;
+ }
+ }
+
+ shader->uniform_dirty_bits = dirty;
+}